summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore79
-rw-r--r--AUTHORS4
-rw-r--r--COPYING3
-rw-r--r--Makefile.am3
-rw-r--r--ceph.spec.in136
-rw-r--r--debian/ceph-mds.postrm48
-rw-r--r--debian/ceph-test.install121
-rw-r--r--debian/ceph.dirs1
-rw-r--r--debian/ceph.install1
-rw-r--r--debian/ceph.postinst1
-rw-r--r--debian/ceph.postrm8
-rw-r--r--debian/ceph.prerm5
-rw-r--r--debian/control2
-rwxr-xr-xdebian/rules9
-rw-r--r--doc/.gitignore2
-rw-r--r--doc/cephfs/fstab.rst2
-rw-r--r--doc/cephfs/hadoop.rst96
-rw-r--r--doc/changelog/v0.56.3.txt562
-rw-r--r--doc/faq.rst325
-rw-r--r--doc/install/debian.rst4
-rw-r--r--doc/install/rpm.rst5
-rw-r--r--doc/rados/operations/auth-intro.rst2
-rw-r--r--doc/rados/operations/authentication.rst2
-rw-r--r--doc/rados/operations/operating.rst2
-rw-r--r--doc/rados/operations/troubleshooting-osd.rst12
-rw-r--r--doc/radosgw/manual-install.rst19
-rw-r--r--doc/rbd/libvirt.rst309
-rw-r--r--doc/release-notes.rst44
-rw-r--r--doc/start/quick-rgw.rst8
-rw-r--r--doc/start/quick-start.rst2
-rw-r--r--keys/autobuild.asc56
-rw-r--r--man/.gitignore1
-rwxr-xr-xqa/qa_scripts/RbdLib.pm58
-rwxr-xr-xqa/qa_scripts/rbd_cli_tests.pl84
-rw-r--r--qa/run_xfstests_qemu.sh3
-rwxr-xr-xqa/workunits/cls/test_cls_lock.sh2
-rwxr-xr-xqa/workunits/cls/test_cls_rbd.sh2
-rwxr-xr-xqa/workunits/cls/test_cls_refcount.sh2
-rwxr-xr-xqa/workunits/cls/test_cls_rgw.sh2
-rw-r--r--qa/workunits/direct_io/test_short_dio_read.c18
-rw-r--r--qa/workunits/direct_io/test_sync_io.c144
-rwxr-xr-xqa/workunits/hadoop-internal-tests/test.sh11
-rwxr-xr-xqa/workunits/hadoop-wordcount/test.sh47
-rwxr-xr-xqa/workunits/libcephfs/test.sh2
-rwxr-xr-xqa/workunits/mon/crush_ops.sh23
-rwxr-xr-xqa/workunits/mon/osd.sh2
-rwxr-xr-xqa/workunits/mon/workloadgen.sh25
-rwxr-xr-xqa/workunits/osdc/stress_objectcacher.sh2
-rwxr-xr-xqa/workunits/rados/stress_watch.sh2
-rwxr-xr-xqa/workunits/rados/test.sh24
-rwxr-xr-xqa/workunits/rados/test_python.sh4
-rwxr-xr-xqa/workunits/rbd/map-snapshot-io.sh1
-rwxr-xr-xqa/workunits/rbd/run_cli_tests.sh7
-rwxr-xr-xqa/workunits/rbd/smalliobench.sh2
-rwxr-xr-xqa/workunits/rbd/test_librbd.sh2
-rwxr-xr-xqa/workunits/rbd/test_librbd_python.sh4
-rw-r--r--src/.gitignore157
-rw-r--r--src/Makefile.am622
-rw-r--r--src/auth/cephx/CephxProtocol.h2
-rwxr-xr-xsrc/ceph-create-keys12
-rwxr-xr-xsrc/ceph-disk-activate317
-rwxr-xr-xsrc/ceph-disk-prepare725
-rw-r--r--src/ceph_authtool.cc6
-rw-r--r--src/ceph_common.sh67
-rw-r--r--src/client/Client.cc61
-rw-r--r--src/client/Client.h14
-rw-r--r--src/client/ClientSnapRealm.cc (renamed from src/client/SnapRealm.cc)2
-rw-r--r--src/client/ClientSnapRealm.h (renamed from src/client/SnapRealm.h)0
-rw-r--r--src/client/Inode.cc2
-rw-r--r--src/client/SyntheticClient.cc17
-rw-r--r--src/client/test_ioctls.c2
-rw-r--r--src/cls/lock/cls_lock.cc2
-rw-r--r--src/cls/rbd/cls_rbd.cc6
-rw-r--r--src/cls/refcount/cls_refcount.cc4
-rw-r--r--src/common/AsyncReserver.h2
-rw-r--r--src/common/DecayCounter.cc82
-rw-r--r--src/common/DecayCounter.h55
-rw-r--r--src/common/Mutex.cc1
-rw-r--r--src/common/Throttle.cc59
-rw-r--r--src/common/Throttle.h3
-rw-r--r--src/common/WorkQueue.cc2
-rw-r--r--src/common/WorkQueue.h6
-rw-r--r--src/common/buffer.cc16
-rw-r--r--src/common/ceph_crypto.cc1
-rw-r--r--src/common/config.h2
-rw-r--r--src/common/config_opts.h13
-rw-r--r--src/common/fiemap.cc9
-rw-r--r--src/common/obj_bencher.cc19
-rw-r--r--src/common/types.cc23
-rw-r--r--src/crush/CrushWrapper.cc75
-rw-r--r--src/crush/CrushWrapper.h14
-rw-r--r--src/crush/crush.c7
-rw-r--r--src/crush/crush.h1
-rw-r--r--src/crushtool.cc2
-rw-r--r--src/dupstore.cc2
-rw-r--r--src/gtest/.gitignore5
-rw-r--r--src/include/buffer.h9
-rw-r--r--src/include/ceph_features.h6
-rw-r--r--src/include/frag.h2
-rw-r--r--src/include/types.h2
-rw-r--r--src/include/xlist.h6
-rw-r--r--src/init-ceph.in18
-rw-r--r--src/key_value_store/cls_kvs.cc4
-rw-r--r--src/key_value_store/kv_flat_btree_async.cc6
-rw-r--r--src/libcephfs.cc7
-rw-r--r--src/librados/librados.cc16
-rw-r--r--src/librbd/internal.cc6
-rw-r--r--src/log/Entry.h2
-rw-r--r--src/logrotate.conf18
-rw-r--r--src/mds/Anchor.cc64
-rw-r--r--src/mds/Anchor.h31
-rw-r--r--src/mds/AnchorServer.cc24
-rw-r--r--src/mds/AnchorServer.h17
-rw-r--r--src/mds/CDentry.h2
-rw-r--r--src/mds/CDir.cc2
-rw-r--r--src/mds/CInode.cc62
-rw-r--r--src/mds/CInode.h85
-rw-r--r--src/mds/Capability.cc172
-rw-r--r--src/mds/Capability.h76
-rw-r--r--src/mds/InoTable.h8
-rw-r--r--src/mds/Locker.cc15
-rw-r--r--src/mds/LogEvent.cc19
-rw-r--r--src/mds/LogEvent.h12
-rw-r--r--src/mds/MDCache.cc15
-rw-r--r--src/mds/MDS.cc28
-rw-r--r--src/mds/MDSMap.cc243
-rw-r--r--src/mds/MDSMap.h165
-rw-r--r--src/mds/MDSTableServer.cc2
-rw-r--r--src/mds/MDSTableServer.h27
-rw-r--r--src/mds/Migrator.cc1
-rw-r--r--src/mds/Server.cc74
-rw-r--r--src/mds/Server.h2
-rw-r--r--src/mds/SessionMap.cc70
-rw-r--r--src/mds/SessionMap.h86
-rw-r--r--src/mds/SimpleLock.h8
-rw-r--r--src/mds/SnapRealm.cc488
-rw-r--r--src/mds/SnapRealm.h148
-rw-r--r--src/mds/SnapServer.cc2
-rw-r--r--src/mds/SnapServer.h10
-rw-r--r--src/mds/events/ECommitted.h19
-rw-r--r--src/mds/events/EExport.h25
-rw-r--r--src/mds/events/EFragment.h34
-rw-r--r--src/mds/events/EImportFinish.h24
-rw-r--r--src/mds/events/EImportStart.h28
-rw-r--r--src/mds/events/EMetaBlob.h245
-rw-r--r--src/mds/events/EOpen.h21
-rw-r--r--src/mds/events/EResetJournal.h17
-rw-r--r--src/mds/events/ESession.h28
-rw-r--r--src/mds/events/ESessions.h29
-rw-r--r--src/mds/events/ESlaveUpdate.h131
-rw-r--r--src/mds/events/EString.h57
-rw-r--r--src/mds/events/ESubtreeMap.h27
-rw-r--r--src/mds/events/ETableClient.h27
-rw-r--r--src/mds/events/ETableServer.h34
-rw-r--r--src/mds/events/EUpdate.h32
-rw-r--r--src/mds/inode_backtrace.cc105
-rw-r--r--src/mds/inode_backtrace.h40
-rw-r--r--src/mds/journal.cc1246
-rw-r--r--src/mds/mdstypes.cc892
-rw-r--r--src/mds/mdstypes.h523
-rw-r--r--src/mds/snap.cc564
-rw-r--r--src/mds/snap.h223
-rw-r--r--src/messages/MClientReconnect.h24
-rw-r--r--src/messages/MMDSMap.h4
-rw-r--r--src/messages/MOSDRepScrub.h5
-rw-r--r--src/mon/AuthMonitor.cc2
-rw-r--r--src/mon/MDSMonitor.cc52
-rw-r--r--src/mon/MDSMonitor.h9
-rw-r--r--src/mon/MonCaps.cc3
-rw-r--r--src/mon/Monitor.cc8
-rw-r--r--src/mon/Monitor.h14
-rw-r--r--src/mon/OSDMonitor.cc156
-rw-r--r--src/mon/OSDMonitor.h24
-rw-r--r--src/mon/PGMap.cc2
-rw-r--r--src/mon/PGMonitor.cc3
-rw-r--r--src/mon/PGMonitor.h6
-rw-r--r--src/monmaptool.cc4
-rw-r--r--src/msg/Message.h4
-rw-r--r--src/msg/Messenger.h11
-rw-r--r--src/msg/msg_types.h2
-rw-r--r--src/ocf/.gitignore2
-rw-r--r--src/ocf/rbd.in2
-rw-r--r--src/os/DBObjectMap.cc17
-rw-r--r--src/os/FileStore.cc92
-rw-r--r--src/os/FileStore.h4
-rw-r--r--src/os/HashIndex.cc2
-rw-r--r--src/os/LFNIndex.cc8
-rw-r--r--src/os/ObjectStore.cc13
-rw-r--r--src/os/ObjectStore.h4
-rw-r--r--src/osd/OSD.cc43
-rw-r--r--src/osd/OSD.h6
-rw-r--r--src/osd/OSDMap.cc9
-rw-r--r--src/osd/OSDMap.h1
-rw-r--r--src/osd/PG.cc568
-rw-r--r--src/osd/PG.h53
-rw-r--r--src/osd/ReplicatedPG.cc22
-rw-r--r--src/osd/osd_types.cc48
-rw-r--r--src/osd/osd_types.h7
-rw-r--r--src/osdc/ObjectCacher.cc2
-rw-r--r--src/osdc/Objecter.cc5
-rw-r--r--src/osdmaptool.cc2
-rw-r--r--src/psim.cc2
-rw-r--r--src/rados.cc4
-rw-r--r--src/rbd.cc18
-rw-r--r--src/rbd_fuse/rbd-fuse.c2
-rw-r--r--src/rgw/logrotate.conf24
-rw-r--r--src/rgw/rgw_acl_s3.cc21
-rw-r--r--src/rgw/rgw_acl_s3.h10
-rw-r--r--src/rgw/rgw_admin.cc15
-rw-r--r--src/rgw/rgw_common.h4
-rw-r--r--src/rgw/rgw_gc.cc2
-rw-r--r--src/rgw/rgw_log.cc7
-rw-r--r--src/rgw/rgw_main.cc14
-rw-r--r--src/rgw/rgw_op.cc106
-rw-r--r--src/rgw/rgw_op.h7
-rw-r--r--src/rgw/rgw_rados.cc418
-rw-r--r--src/rgw/rgw_rados.h26
-rw-r--r--src/rgw/rgw_rest.cc4
-rw-r--r--src/rgw/rgw_rest_s3.cc33
-rw-r--r--src/rgw/rgw_rest_s3.h3
-rw-r--r--src/rgw/rgw_rest_swift.cc6
-rw-r--r--src/rgw/rgw_rest_swift.h2
-rw-r--r--src/rgw/rgw_usage.cc2
-rw-r--r--src/rgw/rgw_user.cc4
-rw-r--r--src/rgw/rgw_xml.cc11
-rw-r--r--src/scratchtoolpp.cc5
-rw-r--r--src/test/ObjectMap/test_keyvaluedb_iterators.cc4
-rw-r--r--src/test/ObjectMap/test_object_map.cc4
-rw-r--r--src/test/bufferlist.cc1787
-rw-r--r--src/test/common/Throttle.cc256
-rw-r--r--src/test/crypto.cc12
-rw-r--r--src/test/encoding/types.h81
-rw-r--r--src/test/filestore/FileStoreTracker.cc6
-rw-r--r--src/test/filestore/TestFileStoreState.cc4
-rw-r--r--src/test/filestore/chain_xattr.cc217
-rwxr-xr-xsrc/test/filestore/run_seed_to.sh10
-rw-r--r--src/test/mon/test_mon_workloadgen.cc13
-rw-r--r--src/test/osd/RadosModel.h6
-rwxr-xr-xsrc/test/run-rbd-tests6
-rw-r--r--src/test/test_filejournal.cc4
-rw-r--r--src/test/test_mutate.cc2
-rw-r--r--src/tools/ceph-filestore-dump.cc22
-rw-r--r--src/tools/common.cc8
-rw-r--r--src/tools/rest_bench.cc4
-rwxr-xr-xsrc/unittest_bufferlist.sh19
-rw-r--r--src/upstart/ceph-hotplug.conf11
-rw-r--r--src/upstart/ceph-osd.conf41
-rw-r--r--udev/95-ceph-osd.rules21
-rw-r--r--wireshark/ceph/packet-ceph.c39
249 files changed, 11828 insertions, 3937 deletions
diff --git a/.gitignore b/.gitignore
index 502f0183260..a28d5158e05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,64 +8,61 @@
*.tar.bz2
*.dsc
*.changes
-./config.*
*.gcda
*.gcov
*.gcno
+*.generated.dot
+*.la
+*.so
+*.swp
+*.swo
+*.tmp
+*.pyc
+*.pyo
+.cproject
.deps
-web/*.html
-gmon.out
-core.*
-vgcore.*
-src/Makefile
-Makefile.in
-/Makefile
-/man/Makefile
+.dirstamp
+.metadata
+.project
+.settings
aclocal.m4
+ar-lib
autom4te.cache
+build-stamp
+ceph.spec
+compile
+config.guess
config.log
config.status
+config.sub
configure
-stamp-h1
+configure-stamp
+core
+cscope.*
depcomp
+gmon.out
install-sh
-missing
-src/ceph_ver.h
-release
-*.la
-ceph.spec
-compile
-config.guess
-config.sub
libtool
ltmain.sh
-cscope.files
-cscope.out
-*.swp
-*.swo
-.metadata/
-/py-compile
-*.pyc
-*.pyo
-core
-/build-stamp
-/configure-stamp
-.settings
-.project
-.cproject
+Makefile.in
+missing
+py-compile
+release
+stamp-h1
+vgcore.*
+
+# specific local dir files
/build-doc
-/doc/object_store.png
-/src/test_*
-*.generated.dot
-src/ocf/ceph
-src/ocf/rbd
-src/omapbench
-src/kvstorebench
-ar-lib
+/config.*
+/Makefile
+/*.patch
# temporary directory used by e.g. "make distcheck", e.g. ceph-0.42
/ceph-[0-9]*/
# M4 Macro directory
m4/
-src/gtest/m4/
+
+# where is this from?
+web/*.html
+
diff --git a/AUTHORS b/AUTHORS
index 08f3b1ca729..289f54bf67b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -13,3 +13,7 @@ Patience Warnick <patience@newdream.net>
Yehuda Sadeh-Weinraub <yehudasa@gmail.com>
Greg Farnum <gregf@hq.newdream.net>
+Contributors
+------------
+
+Loic Dachary <loic@dachary.org>
diff --git a/COPYING b/COPYING
index 20ab537172d..b374bdc1801 100644
--- a/COPYING
+++ b/COPYING
@@ -98,3 +98,6 @@ License:
+Files: src/test/common/Throttle.cc src/test/filestore/chain_xattr.cc
+Copyright: Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+License: LGPL2 or later
diff --git a/Makefile.am b/Makefile.am
index 3f4231438ad..adeb4e57728 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -9,7 +9,8 @@ EXTRA_DIST += \
src/test/run-cli-tests-maybe-unset-ccache \
src/test/cli \
src/test/downloads \
- udev/50-rbd.rules
+ udev/50-rbd.rules \
+ udev/95-ceph-osd.rules
all-local:
diff --git a/ceph.spec.in b/ceph.spec.in
index 7efb9889a74..4724dbb9e95 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -24,7 +24,6 @@ Source0: http://ceph.com/download/%{name}-%{version}.tar.bz2
Requires: librbd1 = %{version}-%{release}
Requires: librados2 = %{version}-%{release}
Requires: libcephfs1 = %{version}-%{release}
-Requires: perl
Requires: python
Requires(post): binutils
BuildRoot: %{_tmppath}/%{name}-%{version}-build
@@ -76,9 +75,6 @@ performance, reliability, and scalability.
Summary: Ceph fuse-based client
Group: System Environment/Base
Requires: %{name}
-Requires: fuse-libs
-Requires: libstdc++
-Requires: libuuid
BuildRequires: fuse-devel
%description fuse
FUSE based client for Ceph distributed network file system
@@ -87,9 +83,6 @@ FUSE based client for Ceph distributed network file system
Summary: Ceph fuse-based client
Group: System Environment/Base
Requires: %{name}
-Requires: fuse-libs
-Requires: libstdc++
-Requires: libuuid
BuildRequires: fuse-devel
%description -n rbd-fuse
FUSE based client to map Ceph rbd images to files
@@ -151,7 +144,6 @@ store using a simple file-like interface.
Summary: RADOS block device client library
Group: System Environment/Libraries
License: LGPL-2.0
-Requires: librados2 = %{version}-%{release}
%description -n librbd1
RBD is a block device striped across multiple distributed objects in
RADOS, a reliable, autonomic distributed object storage cluster
@@ -286,6 +278,7 @@ mkdir -p $RPM_BUILD_ROOT/usr/sbin
ln -sf ../../etc/init.d/ceph %{buildroot}/usr/sbin/rcceph
ln -sf ../../etc/init.d/ceph-radosgw %{buildroot}/usr/sbin/rcceph-radosgw
install -m 0644 -D src/logrotate.conf $RPM_BUILD_ROOT%{_sysconfdir}/logrotate.d/ceph
+install -m 0644 -D src/rgw/logrotate.conf $RPM_BUILD_ROOT%{_sysconfdir}/logrotate.d/radosgw
chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.fetch_config
mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/lib/ceph/tmp/
@@ -295,6 +288,7 @@ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/ceph/
# udev rules
install -D -m 644 udev/50-rbd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/50-rbd.rules
+install -D -m 644 udev/95-ceph-osd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
%clean
rm -rf $RPM_BUILD_ROOT
@@ -360,6 +354,7 @@ fi
%config %{_sysconfdir}/bash_completion.d/radosgw-admin
%config %{_sysconfdir}/bash_completion.d/rbd
%config(noreplace) %{_sysconfdir}/logrotate.d/ceph
+%config(noreplace) %{_sysconfdir}/logrotate.d/radosgw
%{_mandir}/man8/ceph-mon.8*
%{_mandir}/man8/ceph-mds.8*
%{_mandir}/man8/ceph-osd.8*
@@ -396,6 +391,7 @@ fi
/sbin/ceph-disk-activate
/sbin/ceph-disk-prepare
/sbin/ceph-create-keys
+/lib/udev/rules.d/95-ceph-osd.rules
#################################################################################
%files fuse
@@ -516,68 +512,68 @@ fi
#################################################################################
%files -n ceph-test
%defattr(-,root,root,-)
-%{_bindir}/bench_log
-%{_bindir}/dupstore
-%{_bindir}/kvstorebench
-%{_bindir}/multi_stress_watch
-%{_bindir}/omapbench
-%{_bindir}/psim
-%{_bindir}/radosacl
-%{_bindir}/rest-bench
-%{_bindir}/rgw_jsonparser
-%{_bindir}/rgw_multiparser
-%{_bindir}/scratchtool
-%{_bindir}/scratchtoolpp
-%{_bindir}/smalliobench
-%{_bindir}/smalliobenchdumb
-%{_bindir}/smalliobenchfs
-%{_bindir}/smalliobenchrbd
-%{_bindir}/ceph-filestore-dump
-%{_bindir}/streamtest
-%{_bindir}/test_cfuse_cache_invalidate
-%{_bindir}/test_cls_lock
-%{_bindir}/test_cls_rbd
-%{_bindir}/test_cls_refcount
-%{_bindir}/test_cls_rgw
-%{_bindir}/test_filejournal
-%{_bindir}/test_filestore
-%{_bindir}/test_filestore_idempotent
-%{_bindir}/test_filestore_idempotent_sequence
-%{_bindir}/test_filestore_workloadgen
-%{_bindir}/test_ioctls
-%{_bindir}/test_keyvaluedb_atomicity
-%{_bindir}/test_keyvaluedb_iterators
-%{_bindir}/test_libcephfs
-%{_bindir}/test_librbd
-%{_bindir}/test_librbd_fsx
-%{_bindir}/test_mon_workloadgen
-%{_bindir}/test_mutate
-%{_bindir}/test_object_map
-%{_bindir}/test_objectcacher_stress
-%{_bindir}/test_rados_api_aio
-%{_bindir}/test_rados_api_cls
-%{_bindir}/test_rados_api_io
-%{_bindir}/test_rados_api_list
-%{_bindir}/test_rados_api_misc
-%{_bindir}/test_rados_api_pool
-%{_bindir}/test_rados_api_snapshots
-%{_bindir}/test_rados_api_stat
-%{_bindir}/test_rados_api_watch_notify
-%{_bindir}/test_rewrite_latency
-%{_bindir}/test_stress_watch
-%{_bindir}/test_trans
-%{_bindir}/testcrypto
-%{_bindir}/testkeys
-%{_bindir}/testmsgr
-%{_bindir}/testrados
-%{_bindir}/testrados_delete_pools_parallel
-%{_bindir}/testrados_list_parallel
-%{_bindir}/testrados_open_pools_parallel
-%{_bindir}/testrados_watch_notify
-%{_bindir}/testsignal_handlers
-%{_bindir}/testtimers
-%{_bindir}/tpbench
-%{_bindir}/xattr_bench
+%{_bindir}/ceph_bench_log
+%{_bindir}/ceph_dupstore
+%{_bindir}/ceph_kvstorebench
+%{_bindir}/ceph_multi_stress_watch
+%{_bindir}/ceph_omapbench
+%{_bindir}/ceph_psim
+%{_bindir}/ceph_radosacl
+%{_bindir}/ceph_rgw_jsonparser
+%{_bindir}/ceph_rgw_multiparser
+%{_bindir}/ceph_scratchtool
+%{_bindir}/ceph_scratchtoolpp
+%{_bindir}/ceph_smalliobench
+%{_bindir}/ceph_smalliobenchdumb
+%{_bindir}/ceph_smalliobenchfs
+%{_bindir}/ceph_smalliobenchrbd
+%{_bindir}/ceph_filestore_dump
+%{_bindir}/ceph_streamtest
+%{_bindir}/ceph_test_cfuse_cache_invalidate
+%{_bindir}/ceph_test_cls_lock
+%{_bindir}/ceph_test_cls_rbd
+%{_bindir}/ceph_test_cls_refcount
+%{_bindir}/ceph_test_cls_rgw
+%{_bindir}/ceph_test_filejournal
+%{_bindir}/ceph_test_filestore
+%{_bindir}/ceph_test_filestore_idempotent
+%{_bindir}/ceph_test_filestore_idempotent_sequence
+%{_bindir}/ceph_test_filestore_workloadgen
+%{_bindir}/ceph_test_ioctls
+%{_bindir}/ceph_test_keyvaluedb_atomicity
+%{_bindir}/ceph_test_keyvaluedb_iterators
+%{_bindir}/ceph_test_libcephfs
+%{_bindir}/ceph_test_librbd
+%{_bindir}/ceph_test_librbd_fsx
+%{_bindir}/ceph_test_mon_workloadgen
+%{_bindir}/ceph_test_mutate
+%{_bindir}/ceph_test_object_map
+%{_bindir}/ceph_test_objectcacher_stress
+%{_bindir}/ceph_test_rados_api_aio
+%{_bindir}/ceph_test_rados_api_cls
+%{_bindir}/ceph_test_rados_api_io
+%{_bindir}/ceph_test_rados_api_list
+%{_bindir}/ceph_test_rados_api_misc
+%{_bindir}/ceph_test_rados_api_pool
+%{_bindir}/ceph_test_rados_api_snapshots
+%{_bindir}/ceph_test_rados_api_stat
+%{_bindir}/ceph_test_rados_api_watch_notify
+%{_bindir}/ceph_test_rewrite_latency
+%{_bindir}/ceph_test_stress_watch
+%{_bindir}/ceph_test_trans
+%{_bindir}/ceph_test_crypto
+%{_bindir}/ceph_test_keys
+%{_bindir}/ceph_test_msgr
+%{_bindir}/ceph_test_rados
+%{_bindir}/ceph_test_rados_delete_pools_parallel
+%{_bindir}/ceph_test_rados_list_parallel
+%{_bindir}/ceph_test_rados_open_pools_parallel
+%{_bindir}/ceph_test_rados_watch_notify
+%{_bindir}/ceph_test_signal_handlers
+%{_bindir}/ceph_test_timers
+%{_bindir}/ceph_tpbench
+%{_bindir}/ceph_xattr_bench
+%{_bindir}/ceph-coverage
%files -n libcephfs_jni1
%defattr(-,root,root,-)
diff --git a/debian/ceph-mds.postrm b/debian/ceph-mds.postrm
new file mode 100644
index 00000000000..a400f726a1c
--- /dev/null
+++ b/debian/ceph-mds.postrm
@@ -0,0 +1,48 @@
+#!/bin/sh
+# postrm script for ceph-mds
+#
+# see: dh_installdeb(1)
+
+set -e
+
+# summary of how this script can be called:
+# * <postrm> `remove'
+# * <postrm> `purge'
+# * <old-postrm> `upgrade' <new-version>
+# * <new-postrm> `failed-upgrade' <old-version>
+# * <new-postrm> `abort-install'
+# * <new-postrm> `abort-install' <old-version>
+# * <new-postrm> `abort-upgrade' <old-version>
+# * <disappearer's-postrm> `disappear' <overwriter>
+# <overwriter-version>
+# for details, see http://www.debian.org/doc/debian-policy/ or
+# the debian-policy package
+
+
+case "$1" in
+ remove)
+ ;;
+
+ purge)
+ rm -rf --one-file-system -- /var/lib/ceph/mds || true
+ if [ -d /var/lib/ceph/mds ]; then
+ find /var/lib/ceph/mds -mindepth 1 -maxdepth 1 -type d -exec umount \{\} \;
+ fi
+ rm -rf --one-file-system -- /var/lib/ceph/mds
+ ;;
+
+ upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
+ ;;
+
+ *)
+ echo "postrm called with unknown argument \`$1'" >&2
+ exit 1
+ ;;
+esac
+
+# dh_installdeb will replace this with shell code automatically
+# generated by other debhelper scripts.
+
+#DEBHELPER#
+
+exit 0
diff --git a/debian/ceph-test.install b/debian/ceph-test.install
index 7bda9edc5af..1aba361ee9a 100644
--- a/debian/ceph-test.install
+++ b/debian/ceph-test.install
@@ -1,59 +1,62 @@
-usr/bin/bench_log
-usr/bin/dupstore
-usr/bin/kvstorebench
-usr/bin/multi_stress_watch
-usr/bin/omapbench
-usr/bin/psim
-usr/bin/radosacl
-usr/bin/rest-bench
-usr/bin/rgw_multiparser
-usr/bin/scratchtool
-usr/bin/scratchtoolpp
-usr/bin/smalliobench
-usr/bin/smalliobenchdumb
-usr/bin/smalliobenchfs
-usr/bin/smalliobenchrbd
-usr/bin/ceph-filestore-dump
-usr/bin/streamtest
-usr/bin/test_cfuse_cache_invalidate
-usr/bin/test_cls_lock
-usr/bin/test_cls_rbd
-usr/bin/test_cls_refcount
-usr/bin/test_cls_rgw
-usr/bin/test_filejournal
-usr/bin/test_filestore
-usr/bin/test_filestore_idempotent
-usr/bin/test_filestore_idempotent_sequence
-usr/bin/test_filestore_workloadgen
-usr/bin/test_ioctls
-usr/bin/test_keyvaluedb_atomicity
-usr/bin/test_keyvaluedb_iterators
-usr/bin/test_libcephfs
-usr/bin/test_librbd
-usr/bin/test_librbd_fsx
-usr/bin/test_mutate
-usr/bin/test_object_map
-usr/bin/test_rados_api_aio
-usr/bin/test_rados_api_cls
-usr/bin/test_rados_api_io
-usr/bin/test_rados_api_list
-usr/bin/test_rados_api_misc
-usr/bin/test_rados_api_pool
-usr/bin/test_rados_api_snapshots
-usr/bin/test_rados_api_stat
-usr/bin/test_rados_api_watch_notify
-usr/bin/test_rewrite_latency
-usr/bin/test_stress_watch
-usr/bin/test_trans
-usr/bin/testcrypto
-usr/bin/testkeys
-usr/bin/testmsgr
-usr/bin/testrados
-usr/bin/testrados_delete_pools_parallel
-usr/bin/testrados_list_parallel
-usr/bin/testrados_open_pools_parallel
-usr/bin/testrados_watch_notify
-usr/bin/testsignal_handlers
-usr/bin/testtimers
-usr/bin/tpbench
-usr/bin/xattr_bench
+usr/bin/ceph_bench_log
+usr/bin/ceph_dupstore
+usr/bin/ceph_kvstorebench
+usr/bin/ceph_multi_stress_watch
+usr/bin/ceph_omapbench
+usr/bin/ceph_psim
+usr/bin/ceph_radosacl
+usr/bin/ceph_rgw_multiparser
+usr/bin/ceph_rgw_jsonparser
+usr/bin/ceph_scratchtool
+usr/bin/ceph_scratchtoolpp
+usr/bin/ceph_smalliobench
+usr/bin/ceph_smalliobenchdumb
+usr/bin/ceph_smalliobenchfs
+usr/bin/ceph_smalliobenchrbd
+usr/bin/ceph_filestore_dump
+usr/bin/ceph_streamtest
+usr/bin/ceph_test_cfuse_cache_invalidate
+usr/bin/ceph_test_cls_lock
+usr/bin/ceph_test_cls_rbd
+usr/bin/ceph_test_cls_refcount
+usr/bin/ceph_test_cls_rgw
+usr/bin/ceph_test_filejournal
+usr/bin/ceph_test_filestore
+usr/bin/ceph_test_filestore_idempotent
+usr/bin/ceph_test_filestore_idempotent_sequence
+usr/bin/ceph_test_filestore_workloadgen
+usr/bin/ceph_test_ioctls
+usr/bin/ceph_test_keyvaluedb_atomicity
+usr/bin/ceph_test_keyvaluedb_iterators
+usr/bin/ceph_test_libcephfs
+usr/bin/ceph_test_librbd
+usr/bin/ceph_test_librbd_fsx
+usr/bin/ceph_test_mon_workloadgen
+usr/bin/ceph_test_mutate
+usr/bin/ceph_test_object_map
+usr/bin/ceph_test_objectcacher_stress
+usr/bin/ceph_test_rados_api_aio
+usr/bin/ceph_test_rados_api_cls
+usr/bin/ceph_test_rados_api_io
+usr/bin/ceph_test_rados_api_list
+usr/bin/ceph_test_rados_api_misc
+usr/bin/ceph_test_rados_api_pool
+usr/bin/ceph_test_rados_api_snapshots
+usr/bin/ceph_test_rados_api_stat
+usr/bin/ceph_test_rados_api_watch_notify
+usr/bin/ceph_test_rewrite_latency
+usr/bin/ceph_test_stress_watch
+usr/bin/ceph_test_trans
+usr/bin/ceph_test_crypto
+usr/bin/ceph_test_keys
+usr/bin/ceph_test_msgr
+usr/bin/ceph_test_rados
+usr/bin/ceph_test_rados_delete_pools_parallel
+usr/bin/ceph_test_rados_list_parallel
+usr/bin/ceph_test_rados_open_pools_parallel
+usr/bin/ceph_test_rados_watch_notify
+usr/bin/ceph_test_signal_handlers
+usr/bin/ceph_test_timers
+usr/bin/ceph_tpbench
+usr/bin/ceph_xattr_bench
+usr/bin/ceph-coverage
diff --git a/debian/ceph.dirs b/debian/ceph.dirs
index b9b8a21816f..ca7a880636c 100644
--- a/debian/ceph.dirs
+++ b/debian/ceph.dirs
@@ -5,3 +5,4 @@ var/lib/ceph/mon
var/lib/ceph/osd
var/lib/ceph/mds
var/lib/ceph/bootstrap-osd
+var/lib/ceph/bootstrap-mds
diff --git a/debian/ceph.install b/debian/ceph.install
index da097b24c86..fb70d9b9380 100644
--- a/debian/ceph.install
+++ b/debian/ceph.install
@@ -24,3 +24,4 @@ usr/share/man/man8/monmaptool.8
usr/share/man/man8/ceph-clsinfo.8
usr/share/man/man8/ceph-debugpack.8
etc/bash_completion.d/ceph
+lib/udev/rules.d/95-ceph-osd.rules
diff --git a/debian/ceph.postinst b/debian/ceph.postinst
index 1f9469d8f6c..4edbf10d93b 100644
--- a/debian/ceph.postinst
+++ b/debian/ceph.postinst
@@ -27,6 +27,7 @@ set -e
case "$1" in
configure)
rm -f /etc/init/ceph.conf
+ start ceph-all || :
;;
abort-upgrade|abort-remove|abort-deconfigure)
:
diff --git a/debian/ceph.postrm b/debian/ceph.postrm
index e387d5a8bec..7690fcea1b9 100644
--- a/debian/ceph.postrm
+++ b/debian/ceph.postrm
@@ -25,6 +25,14 @@ case "$1" in
purge)
rm -rf /var/log/ceph
+ rm -rf /etc/ceph
+
+ # be a little careful, here: unmount anything beneath here before removing it.
+ rm -rf --one-file-system -- /var/lib/ceph || true
+ if [ -d /var/lib/ceph ]; then
+ find /var/lib/ceph -mindepth 1 -maxdepth 2 -type d -exec umount \{\} \;
+ fi
+ rm -rf --one-file-system -- /var/lib/ceph
;;
upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
diff --git a/debian/ceph.prerm b/debian/ceph.prerm
new file mode 100644
index 00000000000..159a96e33c3
--- /dev/null
+++ b/debian/ceph.prerm
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+stop ceph-all || :
+
+exit 0 \ No newline at end of file
diff --git a/debian/control b/debian/control
index 5f71995a932..e79cbbd2292 100644
--- a/debian/control
+++ b/debian/control
@@ -11,7 +11,7 @@ Standards-Version: 3.9.3
Package: ceph
Architecture: linux-any
-Depends: ${shlibs:Depends}, ${misc:Depends}, sdparm | hdparm, binutils, ceph-common, uuid-runtime, python, xfsprogs, perl
+Depends: ${shlibs:Depends}, ${misc:Depends}, sdparm | hdparm, binutils, ceph-common, uuid-runtime, python, xfsprogs
Recommends: ceph-mds, librados2, librbd1, btrfs-tools, gdisk, parted
Description: distributed storage and file system
Ceph is a distributed storage system designed to provide excellent
diff --git a/debian/rules b/debian/rules
index d35186402cd..2e5c22eacae 100755
--- a/debian/rules
+++ b/debian/rules
@@ -49,7 +49,8 @@ configure-stamp:
dh_testdir
./autogen.sh
./configure --prefix=/usr --sbindir=/sbin --localstatedir=/var \
- --sysconfdir=/etc $(extraopts) $(confflags)
+ --sysconfdir=/etc $(extraopts) $(confflags) \
+ $(CEPH_EXTRA_CONFIGURE_ARGS)
touch $@
build-arch: build
@@ -64,6 +65,7 @@ build-stamp: configure-stamp
cp src/init-ceph debian/ceph.init
cp src/init-radosgw debian/radosgw.init
cp src/logrotate.conf debian/ceph.logrotate
+ cp src/rgw/logrotate.conf debian/radosgw.logrotate
touch $@
@@ -77,7 +79,7 @@ clean:
ltmain.sh missing
rm -f configure Makefile.in man/Makefile.in src/Makefile.in
rm -f src/acconfig.h.in
- rm -f debian/ceph.init debian/radosgw.init debian/ceph.logrotate
+ rm -f debian/ceph.init debian/radosgw.init debian/ceph.logrotate debian/radosgw.logrotate
dh_clean
@@ -90,6 +92,7 @@ install: build
$(MAKE) DESTDIR=$(DESTDIR) install
sed -i "/dependency_libs/ s/'.*'/''/" `find . -name '*.la'`
install -D -m 644 udev/50-rbd.rules $(DESTDIR)/lib/udev/rules.d/50-rbd.rules
+ install -D -m 644 udev/95-ceph-osd.rules $(DESTDIR)/lib/udev/rules.d/95-ceph-osd.rules
# Add here commands to install the package into debian/testpack.
# Build architecture-independent files here.
@@ -127,6 +130,8 @@ binary-arch: build install
# per package, so do this ourselves
install -d -m0755 debian/ceph/etc/init
install -m0644 src/upstart/ceph*.conf debian/ceph/etc/init
+ install -d -m0755 debian/ceph-mds/etc/init
+ mv debian/ceph/etc/init/ceph-mds* debian/ceph-mds/etc/init
install -d -m0755 debian/radosgw/etc/init
install -m0644 src/upstart/radosgw*.conf debian/radosgw/etc/init
dh_installman -a
diff --git a/doc/.gitignore b/doc/.gitignore
index 295eda72a4c..0c7c74746ae 100644
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -1,2 +1,2 @@
-*.tmp
/overview.png
+/object_store.png
diff --git a/doc/cephfs/fstab.rst b/doc/cephfs/fstab.rst
index 96093bf8ec3..b61cd1fcadf 100644
--- a/doc/cephfs/fstab.rst
+++ b/doc/cephfs/fstab.rst
@@ -10,7 +10,7 @@ following to ``/etc/fstab``::
For example::
- 10.10.10.10:6789:/ /mnt/ceph ceph name=admin,secretfile=/etc/ceph/secret.key,noauto,rw,noexec,nodev,noatime,nodiratime 0 2
+ 10.10.10.10:6789:/ /mnt/ceph ceph name=admin,secretfile=/etc/ceph/secret.key,noatime 0 2
.. important:: The ``name`` and ``secret`` or ``secretfile`` options are
mandatory when you have Ceph authentication running. See `Authentication`_
diff --git a/doc/cephfs/hadoop.rst b/doc/cephfs/hadoop.rst
index 7481b7f0d8a..625d46a0eec 100644
--- a/doc/cephfs/hadoop.rst
+++ b/doc/cephfs/hadoop.rst
@@ -3,7 +3,7 @@ Using Hadoop with CephFS
========================
Hadoop Configuration
---------------------
+====================
This section describes the Hadoop configuration options used to control Ceph.
These options are intended to be set in the Hadoop configuration file
@@ -36,8 +36,102 @@ These options are intended to be set in the Hadoop configuration file
| | | |
| | | |
+---------------------+--------------------------+----------------------------+
+|ceph.data.pools |List of Ceph data pools |Default value: default Ceph |
+| |for storing file. |pool. |
+| | | |
+| | | |
++---------------------+--------------------------+----------------------------+
|ceph.localize.reads |Allow reading from file |Default value: true |
| |replica objects | |
| | | |
| | | |
+---------------------+--------------------------+----------------------------+
+
+Support For Per-file Custom Replication
+---------------------------------------
+
+Hadoop users may specify a custom replication factor (e.g. 3 copies of each
+block) when creating a file. However, object replication factors are
+controlled on a per-pool basis in Ceph, and by default a Ceph file system will
+contain a pre-configured pool. In order to support per-file replication Hadoop
+can be configured to select from alternative pools when creating new files.
+
+Additional data pools can be specified using the ``ceph.data.pools``
+configuration option. The value of the option is a comma separated list of
+pool names. The default Ceph pool will be used automatically if this
+configuration option is omitted or the value is empty. For example, the
+following configuration setting will consider the three pools listed. ::
+
+ <property>
+ <name>ceph.data.pools</name>
+ <value>pool1,pool2,pool5</value>
+ </property>
+
+Hadoop will not create pools automatically. In order to create a new pool with
+a specific replication factor use the ``ceph osd pool create`` command, and then
+set the ``size`` property on the pool using the ``ceph osd pool set`` command. For
+more information on creating and configuring pools see the `RADOS Pool
+documentation`_.
+
+.. _RADOS Pool documentation: ../../rados/operations/pools
+
+Once a pool has been created and configured the metadata service must be told
+that the new pool may be used to store file data. A pool can be made available
+for storing file system data using the ``ceph mds add_data_pool`` command.
+
+First, create the pool. In this example we create the ``hadoop1`` pool with
+replication factor 1. ::
+
+ ceph osd pool create hadoop1 100
+ ceph osd pool set hadoop1 size 1
+
+Next, determine the pool id. This can be done using the ``ceph osd dump``
+command. For example, we can look for the newly created ``hadoop1`` pool. ::
+
+ ceph osd dump | grep hadoop1
+
+The output should resemble::
+
+ pool 3 'hadoop1' rep size 1 min_size 1 crush_ruleset 0...
+
+where ``3`` is the pool id. Next we will use the pool id reference to register
+the pool as a data pool for storing file system data. ::
+
+ ceph mds add_data_pool 3
+
+The final step is to configure Hadoop to consider this data pool when
+selecting the target pool for new files. ::
+
+ <property>
+ <name>ceph.data.pools</name>
+ <value>hadoop1</value>
+ </property>
+
+Pool Selection Semantics
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following semantics describe the rules by which Hadoop will choose a pool
+given a desired replication factor and the set of pools specified using the
+``ceph.data.pools`` configuration option.
+
+1. When no custom pools are specified the default Ceph data pool is used.
+2. A custom pool with the same replication factor as the default Ceph data
+ pool will override the default.
+3. A pool with a replication factor that matches the desired replication will
+ be chosen if it exists.
+4. Otherwise, a pool with at least the desired replication factor will be
+ chosen, or the maximum possible.
+
+Debugging Pool Selection
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hadoop will produce log file entry when it cannot determine the replication
+factor of a pool (e.g. it is not configured as a data pool). The log message
+will appear as follows::
+
+ Error looking up replication of pool: <pool name>
+
+Hadoop will also produce a log entry when it wasn't able to select an exact
+match for replication. This log entry will appear as follows::
+
+ selectDataPool path=<path> pool:repl=<name>:<value> wanted=<value>
diff --git a/doc/changelog/v0.56.3.txt b/doc/changelog/v0.56.3.txt
new file mode 100644
index 00000000000..c87675a96ba
--- /dev/null
+++ b/doc/changelog/v0.56.3.txt
@@ -0,0 +1,562 @@
+commit 6eb7e15a4783b122e9b0c85ea9ba064145958aa5
+Author: Gary Lowell <gary.lowell@inktank.com>
+Date: Wed Feb 13 10:10:20 2013 -0800
+
+ v0.56.3
+
+commit f5eb845a0f7a2c28d3a88a37479bcb34f882f40c
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Fri Feb 8 13:14:49 2013 -0800
+
+ rgw: change json formatting for swift list container
+
+ Fixes: #4048
+ There is some difference in the way swift formats the
+ xml output and the json output for list container. In
+ xml the entity is named 'name' and in json it is named
+ 'subdir'.
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit 3e4d79fe42dfc3ca70dc4d5d2aff5223f62eb34b)
+
+commit f21543f0d88f7bacb69cef3712b0ce087f386e93
+Author: Josh Durgin <josh.durgin@inktank.com>
+Date: Mon Feb 11 17:08:55 2013 -0800
+
+ librbd: unprotect any non-unprotected snapshot
+
+ Include snapshots in the UNPROTECTING state as well, which can occur
+ after an unprotect is interrupted.
+
+ Fixes: #4100
+ Backport: bobtail
+ Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+ Reviewed-by: Dan Mick <dan.mick@inktank.com>
+ (cherry picked from commit fe283813b44a7c45def6768ea0788a3a0635957e)
+
+commit 65969f8fbef02ee39f6c2365fffbcd3f633f4b37
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Feb 8 21:36:13 2013 -0800
+
+ java: make CephMountTest use user.* xattr names
+
+ Changes to the xattr code in Ceph require
+ a few tweaks to existing test cases.
+ Specifically, there is now a ceph.file.layout
+ xattr by default and user defined xattrs
+ are prepended with "user."
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Joe Buck <jbbuck@gmail.com>
+ Reviewed-by: Noah Watkins <noahwatkins@gmail.com>
+
+commit 14fddc3ce85d3695aad9d3597f8f50dba5960a86
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Feb 8 09:59:25 2013 -0800
+
+ mon: fix typo in C_Stats
+
+ Broken by previous commit.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 3cf3710be0b4cccc8de152a97be50d983c35116d)
+
+commit 0453140d187016a61950a8836da57f54d2c34602
+Author: Sage Weil <sage@inktank.com>
+Date: Thu Feb 7 23:13:11 2013 -0800
+
+ mon: retry PGStats message on EAGAIN
+
+ If we get EAGAIN from a paxos restart/election/whatever, we should
+ restart the message instead of just blindly acking it.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Joao Luis <joao.luis@inktank.com>
+ (cherry picked from commit 4837063d447afb45554f55bb6fde1c97559acd4b)
+
+commit e68fcec78286363935cf731015108b9ea36b50a6
+Author: Sage Weil <sage@inktank.com>
+Date: Thu Feb 7 22:06:14 2013 -0800
+
+ mon: handle -EAGAIN in completion contexts
+
+ We can get ECANCELED, EAGAIN, or success out of the completion contexts,
+ but in the EAGAIN case (meaning there was an election) we were sending
+ a success to the client. This resulted in client hangs and all-around
+ confusion when the monitor cluster was thrashing.
+
+ Backport: bobtail
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Joao Luis <joao.luis@inktank.com>
+ (cherry picked from commit 17827769f1fe6d7c4838253fcec3b3a4ad288f41)
+
+commit 20ec490555728251444833520a40b20dc8015216
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Feb 12 14:11:09 2013 -0800
+
+ osd: only share maps on hb connection of OSD_HBMSGS feature is set
+
+ Back in 1bc419a7affb056540ba8f9b332b6ff9380b37af we started sharing maps
+ with dead osds via the heartbeat connection, but old code will crash on an
+ unexpected message. Only do this if the OSD_HBMSGS feature is present.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Samuel Just <sam.just@inktank.com>
+ (cherry picked from commit 302b26ff70ee5539da3dcb2e5614e2b7e83b9dcd)
+
+commit cbf63b633e7a59456f503af487fd4ad2607bbd76
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Feb 12 14:10:51 2013 -0800
+
+ osd: tolerate unexpected messages on the heartbeat interface
+
+ We should note but not crash on unexpected messages. Announce this awesome
+ new "capability" via a feature bit.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Samuel Just <sam.just@inktank.com>
+ (cherry picked from commit afda30aeaae0a65f83c6886658354ad2b57c4c43)
+
+ Conflicts:
+
+ src/include/ceph_features.h
+
+commit 102a519632f1b7a0fede9a3fbd4a5c1df0e732a5
+Merge: 2c6afa0 2ebf4d0
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Feb 12 13:39:52 2013 -0800
+
+ Merge remote-tracking branch 'gh/wip-bobtail-osd-msgr' into bobtail
+
+commit 2c6afa058e8b1738c1400392320482945834de86
+Author: Sage Weil <sage@inktank.com>
+Date: Wed Jan 30 11:32:23 2013 -0800
+
+ test_libcephfs: fix xattr test
+
+ Ignore the ceph.*.layout xattrs.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit b0d4dd21c7be86eb47728a4702a3c67ca44424ac)
+
+commit f11beb954976f66bfae75e847937f84958ebeaf3
+Author: Sage Weil <sage@inktank.com>
+Date: Thu Feb 7 22:51:29 2013 -0800
+
+ radosgw-admin: fix cli test
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 1b05b0edbac09d1d7cf0da2e536829df05e48573)
+
+commit ec1085e534eb39d999775bebdcdb997f893a04ae
+Merge: 66d7758 62ed62f
+Author: Sage Weil <sage@inktank.com>
+Date: Thu Feb 7 23:25:30 2013 -0800
+
+ Merge remote-tracking branch 'gh/wip-bobtail-vxattrs' into bobtail
+
+commit 66d775858004d1d4e8a138b8d33a3799e03ce26e
+Author: Sage Weil <sage@inktank.com>
+Date: Mon Feb 4 09:14:39 2013 -0800
+
+ mon: enforce reweight be between 0..1
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Joao Luis <joao.luis@inktank.com>
+ (cherry picked from commit 4e29c95d6f61daa838888840cef0cceedc0fcfdd)
+
+commit 8bab3a1c3d0d2f619ddf885bb9050ad9a1c43517
+Author: Samuel Just <sam.just@inktank.com>
+Date: Thu Feb 7 10:38:00 2013 -0800
+
+ PG: dirty_info on handle_activate_map
+
+ We need to make sure the pg epoch is persisted during
+ activate_map.
+
+ Backport: bobtail
+ Reviewed-by: Sage Weil <sage@inktank.com>
+ Signed-off-by: Samuel Just <sam.just@inktank.com>
+ (cherry picked from commit dbce1d0dc919e221523bd44e1d0834711da1577d)
+
+commit dffa386bc13370c0ef56acf740b5200b2054980f
+Author: Sage Weil <sage@inktank.com>
+Date: Thu Feb 7 10:21:49 2013 -0800
+
+ osd: flush peering queue (consume maps) prior to boot
+
+ If the osd itself is behind on many maps during boot, it will get more and
+ (as part of that) flush the peering wq to ensure the pgs consume them.
+ However, it is possible for OSD to have latest/recnet maps, but pgs to be
+ behind, and to jump directly to boot and join. The OSD is then laggy and
+ unresponsive because the peering wq is way behind.
+
+ To avoid this, call consume_map() (kick the peering wq) at the end of
+ init and flush it to ensure we are *internally* all caught up before we
+ consider joining the cluster.
+
+ I'm pretty sure this is the root cause of #3905 and possibly #3995.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
+ Reviewed-by: Samuel Just <sam.just@inktank.com>
+ (cherry picked from commit af95d934b039d65d3667fc022e2ecaebba107b01)
+
+commit 47c9f46aac4afac37fb6ec72f0482e61f5e0d798
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Wed Feb 6 17:10:00 2013 -0800
+
+ rgw: a tool to fix clobbered bucket info in user's bucket list
+
+ This fixes bad entries in user's bucket list that may have occured
+ due to issue #4039. Syntax:
+
+ $ radosgw-admin user check --uid=<uid> [--fix]
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ Reviewed-by: Greg Farnum <greg@inktank.com>
+ (cherry picked from commit 9cb6c33f0e2281b66cc690a28e08459f2e62ca13)
+
+ Conflicts:
+ src/rgw/rgw_admin.cc
+
+commit 6c8d63819fde1b6854f8fc03351465b420ff1bdc
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Wed Feb 6 16:43:48 2013 -0800
+
+ rgw: bucket recreation should not clobber bucket info
+
+ Fixes: #4039
+ User's list of buckets is getting modified even if bucket already
+ exists. This fix removes the newly created directory object, and
+ makes sure that user info's data points at the correct bucket.
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ Reviewed-by: Greg Farnum <greg@inktank.com>
+ (cherry picked from commit 9d006ec40ced9d97b590ee07ca9171f0c9bec6e9)
+
+ Conflicts:
+ src/rgw/rgw_op.cc
+ src/rgw/rgw_rados.cc
+
+commit cc167914ac9603f87083c63f2cbc8dac9441329f
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Tue Feb 5 14:50:54 2013 -0800
+
+ rgw: a tool to fix buckets with leaked multipart references
+
+ Checks specified bucket for the #4011 symptoms, optionally fix
+ the issue.
+
+ sytax:
+ radosgw-admin bucket check --bucket=<bucket> [--fix]
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit 2d8faf8e5f15e833e6b556b0f3c4ac92e4a4151e)
+
+ Conflicts:
+ src/rgw/rgw_admin.cc
+ src/rgw/rgw_rados.h
+
+commit 4d6964fc7ddd23806e225c95bcb90ef93e4d23a1
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Tue Feb 5 13:54:11 2013 -0800
+
+ rgw: radosgw-admin object unlink
+
+ Add a radosgw-admin option to remove object from bucket index
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit 16235a7acb9543d60470170bb2a09956364626cd)
+
+ Conflicts:
+ src/rgw/rgw_admin.cc
+ src/rgw/rgw_rados.h
+ src/test/cli/radosgw-admin/help.t
+
+commit 2ebf4d065af3dc2e581a25b921071af3efb57f8a
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 09:30:00 2013 -0800
+
+ osd: kill unused addr-based send_map()
+
+ Not used, old API, bad.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit e359a862199c8a94cb238f7271ba1b0edcc0863c)
+
+commit bac5b144b27f32da306161ae7018ccc337704121
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 09:29:37 2013 -0800
+
+ osd: share incoming maps via Connection*, not addrs
+
+ Kill a set of parallel methods that are using the old addr/inst-based
+ msgr APIs, and instead use Connection handles. This is much safer and gets
+ us closer to killing the old msgr API.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 5e2fab54a4fdf2f59e2b635cbddef8a5909acb7c)
+
+commit 9ca3a165ded62313ba153d7bab89dadf3f73999f
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 09:27:00 2013 -0800
+
+ osd: pass new maps to dead osds via existing Connection
+
+ Previously we were sending these maps to dead osds via their old addrs
+ using a new outgoing connection and setting the flags so that the msgr
+ would clean up. That mechanism is possibly buggy and fragile, and we can
+ avoid it entirely if we just reuse the existing heartbeat Connection.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 1bc419a7affb056540ba8f9b332b6ff9380b37af)
+
+commit 4cb28b6ed5a702fdac99b8ec71233ef7f877a7a2
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 09:25:28 2013 -0800
+
+ osd: requeue osdmaps on heartbeat connections for cluster connection
+
+ If we receive an OSDMap on the cluster connection, requeue it for the
+ cluster messenger, and process it there where we normally do. This avoids
+ any concerns about locking and ordering rules.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 76705ace2e9767939aa9acf5d9257c800f838854)
+
+commit e4f7ff8c288eac8a8b57382f11a4b6f93682315a
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 09:23:23 2013 -0800
+
+ msgr: add get_loopback_connection() method
+
+ Return the Connection* for ourselves, so we can queue messages for
+ ourselves.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit a7059eb3f3922cf08c1e5bb5958acc2d45952482)
+
+commit 62ed62f5e2fb068cee38612d7974526aa1b3c759
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 11:33:04 2013 -0800
+
+ qa: add layout_vxattrs.sh test script
+
+ Test virtual xattrs for file and directory layouts.
+
+ TODO: create a data pool, add it to the fs, and make sure we can use it.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 61fbe27a52d12ecd98ddeb5fc0965c4f8ee7841a)
+
+commit d386622c3961a3b57eea42fdb82611cd2e904f4d
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 10:11:18 2013 -0800
+
+ mds: allow dir layout/policy to be removed via removexattr on ceph.dir.layout
+
+ This lets a user remove a policy that was previously set on a dir.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit db31a1f9f27416e4d531fda716e32d42a275e84f)
+
+commit 6af5da7ae2c4ef95c16c6460770b6244d1aa1a6e
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 10:09:39 2013 -0800
+
+ mds: handle ceph.*.layout.* setxattr
+
+ Allow individual fields of file or dir layouts to be set via setxattr.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit ebebf72f0993d028e795c78a986e1aee542ca5e0)
+
+commit c0af056eb9bdb62cfd8a6f9054a3a3c78c8e7447
+Author: Sage Weil <sage@inktank.com>
+Date: Mon Feb 4 22:03:32 2013 -0800
+
+ mdsmap: backported is_data_pool()
+
+ This roughly corresponds to mainline commit 99d9e1d.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit 0407af4641ea19697f8feb0f48a92cde8dd4fbe4
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 10:04:05 2013 -0800
+
+ mds: fix client view of dir layout when layout is removed
+
+ We weren't handling the case where the projected node has NULL for the
+ layout properly. Fixes the client's view when we remove the dir layout.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 09f28541e374ffac198e4d48082b064aae93cb2c)
+
+commit 8ce834d3f50b00fdd59cd237f3fb5fef1d57e1dd
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 10:04:39 2013 -0800
+
+ client: note presence of dir layout in inode operator<<
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 84751489ca208964e617516e04556722008ddf67)
+
+commit 99824b93cec93daaa0d536f031eb3b6180f94e3b
+Author: Sage Weil <sage@inktank.com>
+Date: Sat Jan 19 09:05:59 2013 -0800
+
+ client: list only aggregate xattr, but allow setting subfield xattrs
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit ba32ea9454d36072ec5ea3e6483dc3daf9199903)
+
+commit 809cff488ea1ffa299edd678ba6260993771bde3
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 18 22:26:00 2013 -0800
+
+ client: implement ceph.file.* and ceph.dir.* vxattrs
+
+ Display ceph.file.* vxattrs on any regular file, and ceph.dir.* vxattrs
+ on any directory that has a policy set.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 3f82912a891536dd7e930f98e28d9a8c18fab756)
+
+commit 13babca354d9fbe255de8bae9608a0c158bf6c40
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 18 17:21:37 2013 -0800
+
+ client: move xattr namespace enforcement into internal method
+
+ This captures libcephfs users now too.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit febb96509559084357bfaabf7e4d28e494c274aa)
+
+commit 65ab51740175254ba3ee050f0fd97332dffe2eb7
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 18 17:20:22 2013 -0800
+
+ client: allow ceph.* xattrs
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit ad7ebad70bf810fde45067f78f316f130a243b9c)
+
+commit 6f3c1cd2cc07d951dfc23e523b9c6400b7c77c72
+Author: caleb miles <caselim@gmail.com>
+Date: Mon Jan 14 12:16:12 2013 -0500
+
+ rgw_rest: Make fallback uri configurable.
+
+ Some HTTP servers, notabily lighttp, do not set SCRIPT_URI, make the fallback
+ string configurable.
+
+ Signed-off-by: caleb miles <caleb.miles@inktank.com>
+ Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit b3a2e7e955547a863d29566aab62bcc480e27a65)
+
+ Conflicts:
+ src/rgw/rgw_rest.cc
+
+commit f57d1b4c8cc4d08c6147423d7881be55ed2e88d9
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Fri Feb 1 10:56:11 2013 -0800
+
+ rgw: fix setting of NULL to string
+
+ Fixes: #3777
+ s->env->get() returns char * and not string and can return NULL.
+ Also, remove some old unused code.
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ Reviewed-by: Greg Farnum <greg@inktank.com>
+ (cherry picked from commit 9019fbbe8f84f530b6a8700dfe99dfeb03e0ed3d)
+
+commit 55687240b2de20185524de07e67f42c3b1ae6592
+Author: Samuel Just <sam.just@inktank.com>
+Date: Fri Jan 11 10:44:04 2013 -0800
+
+ OSD: check for empty command in do_command
+
+ Fixes: #3878
+ Signed-off-by: Samuel Just <sam.just@inktank.com>
+ Reviewed-by: David Zafman <david.zafman@inktank.com>
+ (cherry picked from commit 8cf79f252a1bcea5713065390180a36f31d66dfd)
+
+commit c3468f76a5e68a6426f03e508d8ecf26950fca2a
+Author: Danny Al-Gaaf <danny.al-gaaf@bisect.de>
+Date: Wed Jan 30 18:52:24 2013 +0100
+
+ PGMap: fix -Wsign-compare warning
+
+ Fix -Wsign-compare compiler warning:
+
+ mon/PGMap.cc: In member function 'void PGMap::apply_incremental
+ (CephContext*, const PGMap::Incremental&)':
+ mon/PGMap.cc:247:30: warning: comparison between signed and
+ unsigned integer expressions [-Wsign-compare]
+
+ Signed-off-by: Danny Al-Gaaf <danny.al-gaaf@bisect.de>
+ (cherry picked from commit b571f8ee2d22a3894120204bc5f119ff37e1de53)
+
+commit 5a6b9af90f00d08ef97b34ee0b5abc7b0b63e72b
+Author: Sage Weil <sage@inktank.com>
+Date: Mon Jan 28 19:46:33 2013 -0800
+
+ mon: smooth pg stat rates over last N pgmaps
+
+ This smooths the recovery and throughput stats over the last N pgmaps,
+ defaulting to 2.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit a7d15afb529615db56bae038b18b66e60d827a96)
+
+commit 7fd7a5eed19d5ab508d5fe11ff8734bc2bc8c565
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 19:51:40 2013 -0800
+
+ mon/PGMap: report IO rates
+
+ This does not appear to be very accurate; probably the stat values we're
+ displaying are not being calculated correctly.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 3f6837e022176ec4b530219043cf12e009d1ed6e)
+
+commit 7f149cf6730280f0e633d9f5ef3f0f95c5a5e430
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 19:51:14 2013 -0800
+
+ mon/PGMap: report recovery rates
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 208b02a748d97378f312beaa5110d8630c853ced)
+
+commit 8d2d396c6d02bff72aca53920e9ac93fe91428d3
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 19:50:45 2013 -0800
+
+ mon/PGMap: include timestamp
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 76e9fe5f06411eb0e96753dcd708dd6e43ab2c02)
+
+commit 8ab77bd4b510149f4df6b3134de0ef59272cec71
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 19:49:16 2013 -0800
+
+ osd: track recovery ops in stats
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit a2495f658c6d17f56ea0a2ab1043299a59a7115b)
+
+commit 8fd8534b4b808292a4b7c6b9f2f866c431cf9645
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jan 25 19:06:52 2013 -0800
+
+ osd_types: add recovery counts to object_sum_stats_t
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 4aea19ee60fbe1106bdd71de2d172aa2941e8aab)
diff --git a/doc/faq.rst b/doc/faq.rst
index 351e396cb75..9777a272012 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -2,27 +2,308 @@
Frequently Asked Questions
============================
-These questions have been frequently asked on the ceph-devel mailing
-list, the IRC channel, and on the `Ceph.com`_ blog.
+These questions have been frequently asked on the ceph-users and ceph-devel
+mailing lists, the IRC channel, and on the `Ceph.com`_ blog.
.. _Ceph.com: http://ceph.com
+
Is Ceph Production-Quality?
===========================
Ceph's object store is production ready. Large-scale storage systems (i.e.,
-petabytes of data) use Ceph's block devices and Ceph's RESTful object store
-supporting APIs compatible with Amazon's S3 and OpenStack's Swift. `Inktank`_
-provides commercial support for the Ceph object store, block devices, and
-RESTful interfaces.
+petabytes of data) use Ceph's RESTful object store, which provides APIs
+compatible with Amazon's S3 and OpenStack's Swift. Many deployments also use
+the Ceph block device, including deployments of OpenStack and CloudStack.
+`Inktank`_ provides commercial support for the Ceph object store, RESTful
+interfaces, block devices and CephFS with running a single metadata server.
-The CephFS POSIX-compliant filesystem is functionally-complete and has
-been evaluated by a large community of users, but is still undergoing
-methodical QA testing. Once Ceph's filesystem passes QA muster, `Inktank`_
-will provide commercial support for CephFS in production systems.
+The CephFS POSIX-compliant filesystem is functionally complete and has been
+evaluated by a large community of users. There are production systems using
+CephFS with a single metadata server. The Ceph community is actively testing
+clusters with multiple metadata servers for quality assurance. Once CephFS
+passes QA muster when running with multiple metadata servers, `Inktank`_ will
+provide commercial support for CephFS with multiple metadata servers, too.
.. _Inktank: http://inktank.com
+
+What Kind of Hardware Does Ceph Require?
+========================================
+
+Ceph runs on commodity hardware. A typical configuration involves a
+rack mountable server with a baseboard management controller, multiple
+processors, multiple drives, and multiple NICs. There are no requirements for
+proprietary hardware. For details, see `Ceph Hardware Recommendations`_.
+
+
+What Kind of OS Does Ceph Require?
+==================================
+
+Ceph runs on Linux. Most Ceph users run a Debian/Ubuntu distribution, which you
+can install from `APT packages`_. Ceph builds `RPM packages`_ for Federa/RHEL
+too. You can also download Ceph source `tarballs`_ and build Ceph for your
+distribution. See `Installation`_ for details.
+
+
+How Many OSDs Can I Run per Host?
+=================================
+
+Theoretically, a host can run as many OSDs as the hardware can support. Many
+vendors market storage hosts that have large numbers of drives (e.g., 36 drives)
+capable of supporting many OSDs. We don't recommend a huge number of OSDs per
+host though. Ceph was designed to distribute the load across what we call
+"failure domains." See `CRUSH Maps`_ for details.
+
+At the petabyte scale, hardware failure is an expectation, not a freak
+occurrence. Failure domains include datacenters, rooms, rows, racks, and network
+switches. In a single host, power supplies, motherboards, NICs, and drives are
+all potential points of failure.
+
+If you place a large percentage of your OSDs on a single host and that host
+fails, a large percentage of your OSDs will fail too. Having too large a
+percentage of a cluster's OSDs on a single host can cause disruptive data
+migration and long recovery times during host failures. We encourage
+diversifying the risk across failure domains, and that includes making
+reasonable tradeoffs regarding the number of OSDs per host.
+
+
+Can I Use the Same Drive for Multiple OSDs?
+===========================================
+
+Yes. **Please don't do this!** Except for initial evaluations of Ceph, we do not
+recommend running multiple OSDs on the same drive. In fact, we recommend
+**exactly** the opposite. Only run one OSD per drive. For better performance,
+run journals on a separate drive from the OSD drive, and consider using SSDs for
+journals. Run operating systems on a separate drive from any drive storing data
+for Ceph.
+
+Storage drives are a performance bottleneck. Total throughput is an important
+consideration. Sequential reads and writes are important considerations too.
+When you run multiple OSDs per drive, you split up the total throughput between
+competing OSDs, which can slow performance considerably.
+
+
+Why Do You Recommend One Drive Per OSD?
+=======================================
+
+Ceph OSD performance is one of the most common requests for assistance, and
+running an OS, a journal and an OSD on the same disk is a frequently the
+impediment to high performance. Total throughput and simultaneous reads and
+writes are a major bottleneck. If you journal data, run an OS, or run multiple
+OSDs on the same drive, you will very likely see performance degrade
+significantly--especially under high loads.
+
+Running multiple OSDs on a single drive is fine for evaluation purposes. We
+even encourage that in our `5-minute quick start`_. However, just because it
+works does NOT mean that it will provide acceptable performance in an
+operational cluster.
+
+
+What Underlying Filesystem Do You Recommend?
+============================================
+
+Currently, we recommend using XFS as the underlying filesystem for OSD drives.
+We think ``btrfs`` will become the optimal filesystem. However, we still
+encounter enough issues that we do not recommend it for production systems yet.
+See `Filesystem Recommendations`_ for details.
+
+
+How Does Ceph Ensure Data Integrity Across Replicas?
+====================================================
+
+Ceph periodically scrubs placement groups to ensure that they contain the same
+information. Low-level or deep scrubbing reads the object data in each replica
+of the placement group to ensure that the data is identical across replicas.
+
+
+How Many NICs Per Host?
+=======================
+
+You can use one :abbr:`NIC (Network Interface Card)` per machine. We recommend a
+minimum of two NICs: one for a public (front-side) network and one for a cluster
+(back-side) network. When you write an object from the client to the primary
+OSD, that single write only accounts for the bandwidth consumed during one leg
+of the transaction. If you store multiple copies (usually 2-3 copies in a
+typical cluster), the primary OSD makes a write request to your secondary and
+tertiary OSDs. So your back-end network traffic can dwarf your front-end network
+traffic on writes very easily.
+
+
+What Kind of Network Throughput Do I Need?
+==========================================
+
+Network throughput requirements depend on your load. We recommend starting with
+a minimum of 1GB Ethernet. 10GB Ethernet is more expensive, but often comes with
+some additional advantages, including virtual LANs (VLANs). VLANs can
+dramatically reduce the cabling requirements when you run front-side, back-side
+and other special purpose networks.
+
+The number of object copies (replicas) you create is an important factor,
+because replication becomes a larger network load than the initial write itself
+when making multiple copies (e.g., triplicate). Network traffic between Ceph and
+a cloud-based system such as OpenStack or CloudStack may also become a factor.
+Some deployments even run a separate NIC for management APIs.
+
+Finally load spikes are a factor too. Certain times of the day, week or month
+you may see load spikes. You must plan your network capacity to meet those load
+spikes in order for Ceph to perform well. This means that excess capacity may
+remain idle or unused during low load times.
+
+
+Can Ceph Support Multiple Data Centers?
+=======================================
+
+Yes, but with safeguards to ensure data safety. When a client writes data to
+Ceph the primary OSD will not acknowledge the write to the client until the
+secondary OSDs have written the replicas synchronously. See `How Ceph Scales`_
+for details.
+
+The Ceph community is working to ensure that OSD/monitor heartbeats and peering
+processes operate effectively with the additional latency that may occur when
+deploying hardware in different geographic locations. See `Monitor/OSD
+Interaction`_ for details.
+
+If your data centers have dedicated bandwidth and low latency, you can
+distribute your cluster across data centers easily. If you use a WAN over the
+Internet, you may need to configure Ceph to ensure effective peering, heartbeat
+acknowledgement and writes to ensure the cluster performs well with additional
+WAN latency.
+
+Dedicated connections are expensive, so people tend to avoid them. The Ceph
+community is exploring asynchronous writes to make distributing a cluster across
+data centers without significant changes to the default settings (e.g.,
+timeouts).
+
+
+How Does Ceph Authenticate Users?
+=================================
+
+Ceph provides an authentication framework called ``cephx`` that operates in a
+manner similar to Kerberos. The principal difference is that Ceph's
+authentication system is distributed too, so that it doesn't constitute a single
+point of failure. For details, see `Ceph Authentication & Authorization`_.
+
+
+Does Ceph Authentication Provide Multi-tenancy?
+===============================================
+
+Ceph provides authentication at the `pool`_ level, which may be sufficient
+for multi-tenancy in limited cases. Ceph plans on developing authentication
+namespaces within pools in future releases, so that Ceph is well-suited for
+multi-tenancy within pools.
+
+
+Can Ceph use other Multi-tenancy Modules?
+=========================================
+
+The Bobtail release of Ceph integrates RADOS Gateway with OpenStack's Keystone.
+See `Keystone Integration`_ for details.
+
+.. _Keystone Integration: ../radosgw/config#integrating-with-openstack-keystone
+
+
+Does Ceph Enforce Quotas?
+=========================
+
+Currently, Ceph doesn't provide enforced storage quotas. The Ceph community has
+discussed enforcing user quotas within CephFS.
+
+
+Does Ceph Track Per User Usage?
+===============================
+
+The CephFS filesystem provides user-based usage tracking on a subtree basis.
+RADOS Gateway also provides detailed per-user usage tracking. RBD and the
+underlying object store do not track per user statistics. The underlying object
+store provides storage capacity utilization statistics.
+
+
+Does Ceph Provide Billing?
+==========================
+
+Ceph does not provide billing functionality at this time. Improvements to
+pool-based namespaces and pool-based usage tracking may make it feasible to use
+Ceph usage statistics with usage tracking and billing systems in the future.
+
+
+Can Ceph Export a Filesystem via NFS or Samba/CIFS?
+===================================================
+
+Ceph doesn't export CephFS via NFS or Samba. However, you can use a gateway to
+serve a CephFS filesystem to NFS or Samba clients.
+
+
+Can I Access Ceph via a Hypervisor?
+===================================
+
+Currently, the `QEMU`_ hypervisor can interact with the Ceph `block device`_.
+The :abbr:`KVM (Kernel Virtual Machine)` `module`_ and the `librbd` library
+allow you to use QEMU with Ceph. Most Ceph deployments use the `librbd` library.
+Cloud solutions like `OpenStack`_ and `CloudStack`_ interact `libvirt`_ and QEMU
+to as a means of integrating with Ceph.
+
+Ceph integrates cloud solutions via ``libvirt`` and QEMU, but the Ceph community
+is also talking about supporting the Xen hypervisor. Ceph and Citrix engineers
+have built a prototype, but they have not released a stable means of integrating
+Xen with Ceph for general use yet. Similarly, there is interest in support for
+VMWare, but there is no deep-level integration between VMWare and Ceph as yet.
+
+
+Can Block, CephFS, and Gateway Clients Share Data?
+==================================================
+
+For the most part, no. You cannot write data to Ceph using RBD and access the
+same data via CephFS, for example. You cannot write data with RADOS gateway and
+read it with RBD. However, you can write data with the RADOS Gateway
+S3-compatible API and read the same data using the RADOS Gateway
+Swift-comptatible API.
+
+RBD, CephFS and the RADOS Gateway each have their own namespace. The way they
+store data differs significantly enough that it isn't possible to use the
+clients interchangeably. However, you can use all three types of clients, and
+clients you develop yourself via ``librados`` simultaneously on the same
+cluster.
+
+
+Which Ceph Clients Support Striping?
+====================================
+
+Ceph clients--RBD, CephFS and RADOS Gateway--providing striping capability. For
+details on striping, see `Striping`_.
+
+
+What Programming Languages can Interact with the Object Store?
+==============================================================
+
+Ceph's ``librados`` is written in the C programming language. There are
+interfaces for other languages, including:
+
+- C++
+- Java
+- PHP
+- Python
+- Ruby
+
+
+Can I Develop a Client With Another Language?
+=============================================
+
+Ceph does not have many native bindings for ``librados`` at this time. If you'd
+like to fork Ceph and build a wrapper to the C or C++ versions of ``librados``,
+please check out the `Ceph repository`_. You can also use other languages that
+can use the ``librados`` native bindings (e.g., you can access the C/C++ bindings
+from within Perl).
+
+
+Do Ceph Clients Run on Windows?
+===============================
+
+No. There are no immediate plans to support Windows clients at this time. However,
+you may be able to emulate a Linux environment on a Windows host. For example,
+Cygwin may make it feasible to use ``librados`` in an emulated environment.
+
+
How can I add a question to this list?
======================================
@@ -32,9 +313,31 @@ main git repository:
`https://github.com/ceph/ceph/blob/master/doc/faq.rst`_
-.. _https://github.com/ceph/ceph/blob/master/doc/faq.rst: https://github.com/ceph/ceph/blob/master/doc/faq.rst
We use Sphinx to manage our documentation, and this page is generated
from reStructuredText source. See the section on Building Ceph
Documentation for the build procedure.
+
+
+.. _Ceph Hardware Recommendations: ../install/hardware-recommendations
+.. _APT packages: ../install/debian
+.. _RPM packages: ../install/rpm
+.. _tarballs: ../install/get-tarballs
+.. _Installation: ../install
+.. _CRUSH Maps: ../rados/operations/crush-map
+.. _5-minute quick start: ../start/quick-start
+.. _How Ceph Scales: ../architecture#how-ceph-scales
+.. _Monitor/OSD Interaction: ../rados/configuration/mon-osd-interaction
+.. _Ceph Authentication & Authorization: ../rados/operations/auth-intro
+.. _Ceph repository: https://github.com/ceph/ceph
+.. _QEMU: ../rbd/qemu-rbd
+.. _block device: ../rbd
+.. _module: ../rbd/rbd-ko
+.. _libvirt: ../rbd/libvirt
+.. _OpenStack: ../rbd/rbd-openstack
+.. _CloudStack: ../rbd/rbd-cloudstack
+.. _pool: ../rados/operations/pools
+.. _Striping: ../architecture##how-ceph-clients-stripe-data
+.. _https://github.com/ceph/ceph/blob/master/doc/faq.rst: https://github.com/ceph/ceph/blob/master/doc/faq.rst
+.. _Filesystem Recommendations: ../rados/configuration/filesystem-recommendations
diff --git a/doc/install/debian.rst b/doc/install/debian.rst
index fbdebca1976..0c8db696683 100644
--- a/doc/install/debian.rst
+++ b/doc/install/debian.rst
@@ -14,7 +14,7 @@ Packages are cryptographically signed with the ``release.asc`` key.
Add our release key to your system's list of trusted keys to avoid a
security warning::
- wget -q -O- https://raw.github.com/ceph/ceph/master/keys/release.asc | sudo apt-key add -
+ wget -q -O- 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc' | sudo apt-key add -
Add Release Packages
====================
@@ -85,7 +85,7 @@ Packages are cryptographically signed with the ``autobuild.asc`` key.
Add our autobuild key to your system's list of trusted keys to avoid a
security warning::
- wget -q -O- https://raw.github.com/ceph/ceph/master/keys/autobuild.asc | sudo apt-key add -
+ wget -q -O- 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/autobuild.asc' | sudo apt-key add -
Add our package repository to your system's list of APT sources, but
replace ``{BRANCH}`` with the branch you'd like to use (e.g., chef-3,
diff --git a/doc/install/rpm.rst b/doc/install/rpm.rst
index b83fe5fd66a..386e39ec73a 100644
--- a/doc/install/rpm.rst
+++ b/doc/install/rpm.rst
@@ -13,7 +13,8 @@ Install Release Key
Packages are cryptographically signed with the ``release.asc`` key. Add our
release key to your system's list of trusted keys to avoid a security warning::
- sudo rpm --import https://raw.github.com/ceph/ceph/master/keys/release.asc
+ sudo rpm --import 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc'
+
Add Release Packages
====================
@@ -54,7 +55,7 @@ prior to release.
Packages are cryptographically signed with the ``release.asc`` key. Add our
release key to your system's list of trusted keys to avoid a security warning::
- sudo rpm --import https://raw.github.com/ceph/ceph/master/keys/release.asc
+ sudo rpm --import 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/autobuild.asc'
Packages are currently built for the CentOS-6 and Fedora 17 platforms. The
repository package installs the repository details on your local system for use
diff --git a/doc/rados/operations/auth-intro.rst b/doc/rados/operations/auth-intro.rst
index 30e015de6c5..4d8e8c3aa50 100644
--- a/doc/rados/operations/auth-intro.rst
+++ b/doc/rados/operations/auth-intro.rst
@@ -249,7 +249,7 @@ capabilities to a particular pool. This means you can have full access to some
pools, and restricted (or no) access to other pools for the same user.
For example::
- ceph-authtool -n client.foo --cap osd 'allow rwx' pool=customer-pool
+ ceph-authtool -n client.foo --cap osd 'allow rwx pool=customer-pool'
diff --git a/doc/rados/operations/authentication.rst b/doc/rados/operations/authentication.rst
index 4d68639aeb0..20d938bb734 100644
--- a/doc/rados/operations/authentication.rst
+++ b/doc/rados/operations/authentication.rst
@@ -233,7 +233,7 @@ in ``{type}`` and ``{capability}`` pairs on the command line::
For example, to create a user ``client.foo`` with access 'rw' for
daemon type 'osd' and 'r' for daemon type 'mon'::
- sudo ceph auth get-or-create-key client.foo osd rw mon r > keyring.foo
+ sudo ceph auth get-or-create-key client.foo osd 'allow rw' mon 'allow r' > keyring.foo
.. note: User names are associated to user types, which include ``client``
``osd``, ``mon``, and ``mds``. In most cases, you will be
diff --git a/doc/rados/operations/operating.rst b/doc/rados/operations/operating.rst
index be9e184fd15..d6fcd976890 100644
--- a/doc/rados/operations/operating.rst
+++ b/doc/rados/operations/operating.rst
@@ -101,7 +101,7 @@ newer Debian/Ubuntu distributions, you may use the following syntax::
For example::
- sudo service -a ceph stop
+ sudo service ceph -a stop
For older distributions, you may wish to use the ``/etc/init.d/ceph`` path::
diff --git a/doc/rados/operations/troubleshooting-osd.rst b/doc/rados/operations/troubleshooting-osd.rst
index ba5655d9e25..1dffa02bb42 100644
--- a/doc/rados/operations/troubleshooting-osd.rst
+++ b/doc/rados/operations/troubleshooting-osd.rst
@@ -298,7 +298,7 @@ long. The warning threshold defaults to 30 seconds, and is configurable
via the ``osd op complaint time`` option. When this happens, the cluster
log will receive messages like::
- osd.0 192.168.106.220:6800/18813 312 : [WRN] old request osd_op(client.5099.0:790 fatty_26485_object789 [write 0~4096] 2.5e54f643) v4 received at 2012-03-06 15:42:56.054801 currently waiting for sub ops
+ slow request 30.383883 seconds old, received at 2013-02-12 16:27:15.508374: osd_op(client.9821.0:122242 rb.0.209f.74b0dc51.000000000120 [write 921600~4096] 2.981cf6bc) v4 currently no flag points reached
Possible causes include:
@@ -307,6 +307,16 @@ Possible causes include:
* overloaded cluster (check system load, iostat, etc.)
* ceph-osd bug
+Pay particular attention to the ``currently`` part, as that will give
+some clue as to what the request is waiting for. You can further look
+at exactly what requests the slow OSD is working on are, and what
+state(s) they are in with::
+
+ ceph --admin-daemon /var/run/ceph/ceph-osd.{ID}.asok dump_ops_in_flight
+
+These are sorted oldest to newest, and the dump includes an ``age``
+indicating how long the request has been in the queue.
+
Flapping OSDs
=============
diff --git a/doc/radosgw/manual-install.rst b/doc/radosgw/manual-install.rst
index ed423d23a42..58229a6485b 100644
--- a/doc/radosgw/manual-install.rst
+++ b/doc/radosgw/manual-install.rst
@@ -4,9 +4,15 @@
.. note: If you deploy Ceph with Chef cookbooks, you may skip this section.
+Install Packages
+----------------
+
To install RADOS Gateway, you must install Apache and FastCGI first. ::
sudo apt-get update && sudo apt-get install apache2 libapache2-mod-fastcgi
+
+100-Continue Support
+--------------------
The Ceph community provides a slightly optimized version of the ``apache2``
and ``fastcgi`` packages. The material difference is that the Ceph packages are
@@ -30,7 +36,16 @@ You may also clone Ceph's Apache and FastCGI git repositories::
.. _FastCGI Oneric: http://gitbuilder.ceph.com/libapache-mod-fastcgi-deb-oneiric-x86_64-basic/
.. _FastCGI Precise: http://gitbuilder.ceph.com/libapache-mod-fastcgi-deb-precise-x86_64-basic/
.. _RFC 2616, Section 8: http://www.w3.org/Protocols/rfc2616/rfc2616-sec8.html
-
+
+.. important: If you do NOT use a modified fastcgi as described above,
+ you should disable 100-Continue support by adding the following to
+ your ``ceph.conf``::
+
+ rgw print continue = false
+
+Apache Configuration
+--------------------
+
Enable the URL rewrite modules for Apache and FastCGI. For example::
sudo a2enmod rewrite
@@ -52,7 +67,7 @@ Then, install RADOS Gateway. For example::
Enable SSL
-==========
+----------
Some REST clients use HTTPS by default. So you should consider enabling SSL
for Apache on the server machine. ::
diff --git a/doc/rbd/libvirt.rst b/doc/rbd/libvirt.rst
index e0228f08388..69cc31c20b4 100644
--- a/doc/rbd/libvirt.rst
+++ b/doc/rbd/libvirt.rst
@@ -14,15 +14,44 @@ to many different hypervisors, including:
- VirtualBox
- etc.
-Ceph RADOS block devices support QEMU/KVM, which means you can use RADOS
-block devices with software that interfaces with ``libvirt``. For example,
-OpenStack's integration to Ceph uses ``libvirt`` to interact with QEMU/KVM,
-and QEMU/KVM interacts with RADOS block devices via ``librbd``.
+Ceph block devices support QEMU/KVM. You can use Ceph block devices with
+software that interfaces with ``libvirt``. The following stack diagram
+illustrates how ``libvirt`` and QEMU use Ceph block devices via ``librbd``.
+
+
+.. ditaa:: +---------------------------------------------------+
+ | libvirt |
+ +------------------------+--------------------------+
+ |
+ | configures
+ v
+ +---------------------------------------------------+
+ | QEMU |
+ +---------------------------------------------------+
+ | librbd |
+ +------------------------+-+------------------------+
+ | OSDs | | Monitors |
+ +------------------------+ +------------------------+
+
+
+The most common ``libvirt`` use case involves providing Ceph block devices to
+cloud solutions like OpenStack or CloudStack. The cloud solution uses
+``libvirt`` to interact with QEMU/KVM, and QEMU/KVM interacts with Ceph block
+devices via ``librbd``. See `Block Devices and OpenStack`_ and `Block Devices
+and CloudStack`_ for details.
+
+You can also use Ceph block devices with ``libvirt``, ``virsh`` and the
+``libvirt`` API. See `libvirt Virtualization API`_ for details.
+
+Prerequisites
+=============
+
+- `Install`_ and `configure`_ a Ceph cluster
+- `Install and configure`_ QEMU/KVM
-See `libvirt Virtualization API`_ for details.
Installing ``libvirt`` on Ubuntu 12.04 Precise
-----------------------------------------------
+==============================================
``libvirt`` packages are incorporated into the Ubuntu 12.04 precise
distribution. To install ``libvirt`` on precise, execute the following::
@@ -31,12 +60,12 @@ distribution. To install ``libvirt`` on precise, execute the following::
Installing ``libvirt`` on Earlier Versions of Ubuntu
-----------------------------------------------------
+====================================================
-For Ubuntu distributions 11.10 oneiric and earlier, you must build
-``libvirt`` from source. Clone the ``libvirt`` repository, and use
-`AutoGen`_ to generate the build. Then execute ``make`` and
-``make install`` to complete the installation. For example::
+For Ubuntu distributions 11.10 oneiric and earlier, you must build ``libvirt``
+from source. Clone the ``libvirt`` repository, and use `AutoGen`_ to generate
+the build. Then, execute ``make`` and ``make install`` to complete the
+installation. For example::
git clone git://libvirt.org/libvirt.git
cd libvirt
@@ -46,6 +75,262 @@ For Ubuntu distributions 11.10 oneiric and earlier, you must build
See `libvirt Installation`_ for details.
+
+Using Ceph with Virtual Machines
+================================
+
+To create VMs that use Ceph block devices, use the procedures in the following
+sections. In the exemplary embodiment, we've used ``libvirt-pool`` for the pool
+name, ``client.libvirt`` for the user name, and ``new-libvirt-image`` for the
+image name. You may use any value you like, but ensure you replace those values
+when executing commands in the subsequent procedures.
+
+
+Configuring Ceph
+----------------
+
+To configure Ceph for use with ``libvirt``, perform the following steps:
+
+#. `Create a pool`_ (or use the default). The following example uses the
+ pool name ``libvirt-pool`` with 128 placement groups. ::
+
+ ceph osd pool create libvirt-pool 128 128
+
+ Verify the pool exists. ::
+
+ ceph osd lspools
+
+#. `Create a Ceph Name`_ (or use ``client.admin`` for version 0.9.7 and earlier).
+ The following example uses the Ceph name ``client.libvirt`` and references
+ ``libvirt-pool``. ::
+
+ ceph auth get-or-create client.libvirt mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=libvirt-pool'
+
+ Verify the name exists. ::
+
+ ceph auth list
+
+ **NOTE**: ``libvirt`` will access Ceph using the ID ``libvirt``,
+ not the Ceph name ``client.libvirt``. See `Cephx Commandline`_ for detailed
+ explanation of the difference between ID and name.
+
+#. Use QEMU to `create an image`_ in your RBD pool.
+ The following example uses the image name ``new-libvirt-image``
+ and references ``libvirt-pool``. ::
+
+ qemu-img create -f rbd rbd:libvirt-pool/new-libvirt-image 2G
+
+ Verify the image exists. ::
+
+ rbd -p libvirt-pool ls
+
+ **NOTE:** You can also use `rbd create`_ to create an image, but we
+ recommend ensuring that QEMU is working properly.
+
+
+
+Preparing the VM Manager
+------------------------
+
+You may use ``libvirt`` without a VM manager, but you may find it simpler to
+create your first domain with ``virt-manager``.
+
+#. Install a virtual machine manager. See `KVM/VirtManager`_ for details. ::
+
+ sudo apt-get install virt-manager
+
+#. Download an OS image (if necessary).
+
+#. Launch the virtual machine manager. ::
+
+ sudo virt-manager
+
+
+
+Creating a VM
+-------------
+
+To create a VM with ``virt-manager``, perform the following steps:
+
+#. Press the **Create New Virtual Machine** button.
+
+#. Name the new virtual machine domain. In the exemplary embodiment, we
+ use the name ``libvirt-virtual-machine``. You may use any name you wish,
+ but ensure you replace ``libvirt-virtual-machine`` with the name you
+ choose in subsequent commandline and configuration examples. ::
+
+ libvirt-virtual-machine
+
+#. Import the image. ::
+
+ /path/to/image/recent-linux.img
+
+ **NOTE:** Import a recent image. Some older images may not rescan for
+ virtual devices properly.
+
+#. Configure and start the VM.
+
+#. You may use ``virsh list`` to verify the VM domain exists. ::
+
+ sudo virsh list
+
+#. Login to the VM (root/root)
+
+#. Stop the VM before configuring it for use with Ceph.
+
+
+Configuring the VM
+------------------
+
+When configuring the VM for use with Ceph, it is important to use ``virsh``
+where appropriate. Additionally, ``virsh`` commands often require root
+privileges (i.e., ``sudo``) and will not return appropriate results or notify
+you that that root privileges are required. For a reference of ``virsh``
+commands, refer to `Virsh Command Reference`_.
+
+
+#. Open the configuration file with ``virsh edit``. ::
+
+ sudo virsh edit {vm-domain-name}
+
+ Under ``<devices>`` there should be a ``<disk>`` entry. ::
+
+ <devices>
+ <emulator>/usr/bin/kvm</emulator>
+ <disk type='file' device='disk'>
+ <driver name='qemu' type='raw'/>
+ <source file='/path/to/image/recent-linux.img'/>
+ <target dev='hda' bus='ide'/>
+ <address type='drive' controller='0' bus='0' unit='0'/>
+ </disk>
+
+
+ Replace ``/path/to/image/recent-linux.img`` with the path to the OS image.
+
+ **IMPORTANT:** Use ``sudo virsh edit`` instead of a text editor. If you edit
+ the configuration file under ``/etc/libvirt/qemu`` with a text editor,
+ ``libvirt`` may not recognize the change. If there is a discrepancy between
+ the contents of the XML file under ``/etc/libvirt/qemu`` and the result of
+ ``sudo virsh dumpxml {vm-domain-name}``, then your VM may not work
+ properly.
+
+
+#. Add the Ceph RBD image you created as a ``<disk>`` entry. ::
+
+ <disk type='network' device='disk'>
+ <source protocol='rbd' name='libvirt-pool/new-libvirt-image'>
+ <host name='{monitor-host}' port='6789'/>
+ </source>
+ <target dev='hdb' bus='ide'/>
+ </disk>
+
+ Replace ``{monitor-host}`` with the name of your host, and replace the
+ pool and/or image name as necessary. You may add multiple ``<host>``
+ entries for your Ceph monitors. The ``dev`` attribute is the logical
+ device name that will appear under the ``/dev`` directory of your
+ VM. The optional ``bus`` attribute indicates the type of disk device to
+ emulate. The valid settings are driver specific (e.g., "ide", "scsi",
+ "virtio", "xen", "usb" or "sata").
+
+ See `Disks`_ for details of the ``<disk>`` element, and its child elements
+ and attributes.
+
+#. Save the file.
+
+#. If you are using `Ceph Authentication`_, you must generate a secret. ::
+
+ cat > secret.xml <<EOF
+ <secret ephemeral='no' private='no'>
+ <usage type='ceph'>
+ <name>client.libvirt secret</name>
+ </usage>
+ </secret>
+ EOF
+
+#. Define the secret. ::
+
+ sudo virsh secret-define --file secret.xml
+ <uuid of secret is output here>
+
+#. Get the ``client.libvirt`` key and save the key string to a file. ::
+
+ sudo ceph auth list
+ vim client.libvirt.key
+
+#. Set the UUID of the secret. ::
+
+ sudo virsh secret-set-value --secret {uuid of secret} --base64 $(cat client.libvirt.key) && rm client.libvirt.key secret.xml
+
+ You must also set the secret manually by adding the following ``<auth>``
+ entry to the ``<disk>`` element you entered earlier (replacing the
+ ``uuid`` value with the result from the command line example above). ::
+
+ sudo virsh edit {vm-domain-name}
+
+ Then, add ``<auth></auth>`` element to the domain configuration file::
+
+ ...
+ </source>
+ <auth username='libvirt'>
+ <secret type='ceph' uuid='9ec59067-fdbc-a6c0-03ff-df165c0587b8'/>
+ </auth>
+ <target ...
+
+
+ **NOTE:** The exemplary ID is ``libvirt``, not the Ceph name
+ ``client.libvirt`` as generated at step 2 of `Configuring Ceph`_. Ensure
+ you use the ID component of the Ceph name you generated. If for some reason
+ you need to regenerate the secret, you will have to execute
+ ``sudo virsh secret-undefine {uuid}`` before executing
+ ``sudo virsh secret-set-value`` again.
+
+
+Summary
+-------
+
+Once you have configured the VM for use with Ceph, you can start the VM.
+To verify that the VM and Ceph are communicating, you may perform the
+following procedures.
+
+
+#. Check to see if Ceph is running::
+
+ ceph health
+
+#. Check to see if the VM is running. ::
+
+ sudo virsh list
+
+#. Check to see if the VM is communicating with Ceph. Replace
+ ``{vm-domain-name}`` with the name of your VM domain::
+
+ sudo virsh qemu-monitor-command --hmp {vm-domain-name} 'info block'
+
+#. Check to see if the device from ``<target dev='hdb' bus='ide'/>`` appears
+ under ``/dev`` or under ``proc/partitions``. ::
+
+ ls dev
+ cat proc/partitions
+
+If everything looks okay, you may begin using the Ceph block device
+within your VM.
+
+
+
.. _AutoGen: http://www.gnu.org/software/autogen/
.. _libvirt Installation: http://www.libvirt.org/compiling.html
-.. _libvirt Virtualization API: http://www.libvirt.org \ No newline at end of file
+.. _libvirt Virtualization API: http://www.libvirt.org
+.. _Install: ../../install
+.. _configure: ../../rados/configuration
+.. _Install and configure: ../qemu-rbd
+.. _Block Devices and OpenStack: ../rbd-openstack
+.. _Block Devices and CloudStack: ../rbd-cloudstack
+.. _Create a pool: ../../rados/operations/pools#create-a-pool
+.. _Create a Ceph Name: ../../rados/operations/authentication#add-a-key
+.. _create an image: ../qemu-rbd#creating-images-with-qemu
+.. _Virsh Command Reference: http://www.libvirt.org/virshcmdref.html
+.. _KVM/VirtManager: https://help.ubuntu.com/community/KVM/VirtManager
+.. _Ceph Authentication: ../../rados/operations/auth-intro
+.. _Disks: http://www.libvirt.org/formatdomain.html#elementsDisks
+.. _rbd create: ../rados-rbd-cmds#creating-a-block-device-image
+.. _Cephx Commandline: ../../rados/operations/authentication#cephx-commandline-options \ No newline at end of file
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index a46eea70cd5..d7840fd645d 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -2,6 +2,50 @@
Release Notes
===============
+v0.56.3 "bobtail"
+-----------------
+
+This release has several bug fixes surrounding OSD stability. Most
+significantly, an issue with OSDs being unresponsive shortly after
+startup (and occasionally crashing due to an internal heartbeat check)
+is resolved. Please upgrade.
+
+Upgrading
+~~~~~~~~~
+
+* A bug was fixed in which the OSDMap epoch for PGs without any IO
+ requests was not recorded. If there are pools in the cluster that
+ are completely idle (for example, the ``data`` and ``metadata``
+ pools normally used by CephFS), and a large number of OSDMap epochs
+ have elapsed since the ``ceph-osd`` daemon was last restarted, those
+ maps will get reprocessed when the daemon restarts. This process
+ can take a while if there are a lot of maps. A workaround is to
+ 'touch' any idle pools with IO prior to restarting the daemons after
+ packages are upgraded::
+
+ rados bench 10 write -t 1 -b 4096 -p {POOLNAME}
+
+ This will typically generate enough IO to touch every PG in the pool
+ without generating significant cluster load, and also cleans up any
+ temporary objects it creates.
+
+Notable changes
+~~~~~~~~~~~~~~~
+
+* osd: flush peering work queue prior to start
+* osd: persist osdmap epoch for idle PGs
+* osd: fix and simplify connection handling for heartbeats
+* osd: avoid crash on invalid admin command
+* mon: fix rare races with monitor elections and commands
+* mon: enforce that OSD reweights be between 0 and 1 (NOTE: not CRUSH weights)
+* mon: approximate client, recovery bandwidth logging
+* radosgw: fixed some XML formatting to conform to Swift API inconsistency
+* radosgw: fix usage accounting bug; add repair tool
+* radosgw: make fallback URI configurable (necessary on some web servers)
+* librbd: fix handling for interrupted 'unprotect' operations
+* mds, ceph-fuse: allow file and directory layouts to be modified via virtual xattrs
+
+
v0.56.2 "bobtail"
-----------------
diff --git a/doc/start/quick-rgw.rst b/doc/start/quick-rgw.rst
index 8943b2716f3..2c5ef8a2f7b 100644
--- a/doc/start/quick-rgw.rst
+++ b/doc/start/quick-rgw.rst
@@ -249,10 +249,9 @@ Gateway via the Swift-compatible API.
RGW's ``user:subuser`` tuple maps to the ``tenant:user`` tuple expected by Swift.
-.. important:: RGW's Swift authentication service only supports
- built-in Swift authentication (``-V 1.0``) at this point. There is
- currently no way to make RGW authenticate users via OpenStack
- Identity Service (Keystone).
+.. note:: RGW's Swift authentication service only supports
+ built-in Swift authentication (``-V 1.0``) at this point. See
+ `RGW Configuration`_ for Keystone integration details.
Enable SSL
@@ -276,3 +275,4 @@ Then, restart Apache. ::
.. _Create rgw.conf: ../../radosgw/config/index.html#create-rgw-conf
.. _5-minute Quick Start: ../quick-start
.. _RADOS Gateway Manual Install: ../../radosgw/manual-install
+.. _RGW Configuration: ../../radosgw/config \ No newline at end of file
diff --git a/doc/start/quick-start.rst b/doc/start/quick-start.rst
index 33d7c844103..2fb29f99402 100644
--- a/doc/start/quick-start.rst
+++ b/doc/start/quick-start.rst
@@ -41,7 +41,7 @@ To get the latest Ceph packages, add a release key to :abbr:`APT (Advanced
Package Tool)`, add a source location to the ``/etc/apt/sources.list`` on your
Ceph server and client machines, update your systems and install Ceph. ::
- wget -q -O- https://raw.github.com/ceph/ceph/master/keys/release.asc | sudo apt-key add -
+ wget -q -O- 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc' | sudo apt-key add -
echo deb http://ceph.com/debian/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list
sudo apt-get update && sudo apt-get install ceph
diff --git a/keys/autobuild.asc b/keys/autobuild.asc
index 2a1d17dc9ef..e43bd6c6e4d 100644
--- a/keys/autobuild.asc
+++ b/keys/autobuild.asc
@@ -1,5 +1,5 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
-Version: GnuPG v1.4.9 (GNU/Linux)
+Version: GnuPG v1.4.10 (GNU/Linux)
mQGiBE1Rr28RBADCxdpLV3ea9ocpS/1+UCvHqD5xjmlw/9dmji4qrUX0+IhPMNuA
GBBt2CRaR7ygMF5S0NFXooegph0/+NT0KisLIuhUI3gde4SWb5jsb8hpGUse9MC5
@@ -11,31 +11,31 @@ cF30A/9GotDdnMlqh8bFBOCMuxfRow7H8RpfL0fX7VHA0knAZEDk2rNFeebL5QKH
GNJm9Wa6JSVj1NUIaz4LHyravqXi4MXzlUqauhLHw1iG+qwZlPM04z+1Dj6A+2Hr
b5UxI/I+EzmO5OYa38YWOqybNVBH0wO+sMCpdBq0LABa8X29LbRPQ2VwaCBhdXRv
bWF0ZWQgcGFja2FnZSBidWlsZCAoQ2VwaCBhdXRvbWF0ZWQgcGFja2FnZSBidWls
-ZCkgPHNhZ2VAbmV3ZHJlYW0ubmV0PohmBBMRAgAmBQJNUa9vAhsDBQkDwmcABgsJ
-CAcDAgQVAggDBBYCAwECHgECF4AACgkQbq6uIgPDlRpR0QCfZnYE8vEDX4JL3sZj
-5LvMsXruULIAnjHBAYvdlu5iMowoEMQDJlNNdscxuQQNBE1Rr28QEACKG04kxGY1
-cwGoInHVP6z1+8oqGiaiYWFflYRtSiwoUVtl30T1sMOSzoEvmauc+rmBBfsyaBb8
-DLDUIgGKv1FCOY/tfqnOyQXotPjgaLeCtK5A5Z5D212wbskf5fRHAxiychwKURiE
-eesRa7EWrF6ohFxOTy9NOlFi7ctusShw6Q2kUtN7bQCX9hJdYs7PYQXvCXvW8DNt
-7IitF7MpgMHNcj0wik6p38I4s7pqK6mqP4AXVVSWbJKr/LSz8bI8KhWRAT7erVAZ
-f6FElR2xZVr3c4zsE2HFpnZTsM5y/nj8fUkgKGl8OfBuUoh+MCVfnPmE6sgWfDTK
-kwWtUcmL6V9UQ1INUJ3sk+XBY9SMNbOn04su9FjQyNEMI/3VK7yuyKBRAN7IIVgP
-2ch499m6+YFV9ZkG3JSTovNiqSpQouW7YPkS+8mxlPo03LQcU5bHeacBl0T8Xjlv
-qu6q279EliHul4huKL0+myPN4DtmOTh/kwgSy3BGCBdS+wfAJSZcuKI7pk7pHGCd
-UjNMHQZmPFbwzp33bVLd16gnAx0OW5DOn6l0VfgIQNSJ2rn7WZ5jdyg/Flp2VlWV
-tAHFLzkCa+LvQ5twSuzrV/VipSr3xz3pTDLY+ZxDztvrgA6AST8+sdq6uQTYjwUQ
-V0wzanvp9hkC5eqRY6YlzcgMkWFv8DCIEwADBQ//ZQaeVmG6T5vyfXf2JrCipmI4
-MAdO+ezEtWE82wgixlCvvm26UmUejCYgtD6DmwY/7/bIjvJDhUwP0+hAHHOpR62g
-ncoMtbMryHpm3FvYH58JNk5gx8ZA322WEc2GCRCQzrMQoMKBcpZY/703GpQ4l3RZ
-7/25gq7ANohV5zeddFQftc05PMBBJLU3U+lrnahJS1WaOXNQzS6oVj9jNda1jkgc
-Qni6QssSIMT6rAPsVbGJhe9mxr2VWdQ90QlubpszIeSJuqqJxLwqH8XHXZmQOYxm
-yVP9a3pFqWDmsNxDA8ttYnMIc+nUAgCDJ84ScwQ1GvoCUD1b1cFNzvvhEHsNb4D/
-XbdrFcFGwEkeyivUsojdq2YnGjYSgauqyNWbeEgBrWzUe5USYysmziL/KAubcUjI
-beRGxyPS6iQ2kbvfEJJPgocWTfLs5j61FObO+MVlj+PEmxWbcsIRv/pnG2V2FPJ8
-evhzgvp7cG9imZPM6dWHzc/ZFdi3Bcs51RtStsvPqXv4icKIi+01h1MLHNBqwuUk
-IiiK7ooMlvnp+DiEsVSuYYKBdGTi+4+nduuYL2g8CTNJKZuC46dY7EcE3lRYZlxl
-7dwN3jfLPRlnNscs34dwhZa+b70Flia0U1DNF4jrIFFBSHD3TqMg0Z6kxp1Tfxpe
-GOLOqnBWrr0GKehu9CGITwQYEQIADwUCTVGvbwIbDAUJA8JnAAAKCRBurq4iA8OV
-GqKjAJ9QA7mNQs0Rko5VGYA+xjPokf0yVACfQMEFVHxT/k9+awAbBFLR3D0jjJ4=
-=PYuQ
+ZCkgPHNhZ2VAbmV3ZHJlYW0ubmV0PohgBBMRAgAgAhsDBgsJCAcDAgQVAggDBBYC
+AwECHgECF4AFAlEUm1YACgkQbq6uIgPDlRqTUACeMqJ+vwatwb+y/KWeNfmgtQ8+
+kDwAn0MHwY42Wmb7FA891j88enooCdxRuQQNBE1Rr28QEACKG04kxGY1cwGoInHV
+P6z1+8oqGiaiYWFflYRtSiwoUVtl30T1sMOSzoEvmauc+rmBBfsyaBb8DLDUIgGK
+v1FCOY/tfqnOyQXotPjgaLeCtK5A5Z5D212wbskf5fRHAxiychwKURiEeesRa7EW
+rF6ohFxOTy9NOlFi7ctusShw6Q2kUtN7bQCX9hJdYs7PYQXvCXvW8DNt7IitF7Mp
+gMHNcj0wik6p38I4s7pqK6mqP4AXVVSWbJKr/LSz8bI8KhWRAT7erVAZf6FElR2x
+ZVr3c4zsE2HFpnZTsM5y/nj8fUkgKGl8OfBuUoh+MCVfnPmE6sgWfDTKkwWtUcmL
+6V9UQ1INUJ3sk+XBY9SMNbOn04su9FjQyNEMI/3VK7yuyKBRAN7IIVgP2ch499m6
++YFV9ZkG3JSTovNiqSpQouW7YPkS+8mxlPo03LQcU5bHeacBl0T8Xjlvqu6q279E
+liHul4huKL0+myPN4DtmOTh/kwgSy3BGCBdS+wfAJSZcuKI7pk7pHGCdUjNMHQZm
+PFbwzp33bVLd16gnAx0OW5DOn6l0VfgIQNSJ2rn7WZ5jdyg/Flp2VlWVtAHFLzkC
+a+LvQ5twSuzrV/VipSr3xz3pTDLY+ZxDztvrgA6AST8+sdq6uQTYjwUQV0wzanvp
+9hkC5eqRY6YlzcgMkWFv8DCIEwADBQ//ZQaeVmG6T5vyfXf2JrCipmI4MAdO+ezE
+tWE82wgixlCvvm26UmUejCYgtD6DmwY/7/bIjvJDhUwP0+hAHHOpR62gncoMtbMr
+yHpm3FvYH58JNk5gx8ZA322WEc2GCRCQzrMQoMKBcpZY/703GpQ4l3RZ7/25gq7A
+NohV5zeddFQftc05PMBBJLU3U+lrnahJS1WaOXNQzS6oVj9jNda1jkgcQni6QssS
+IMT6rAPsVbGJhe9mxr2VWdQ90QlubpszIeSJuqqJxLwqH8XHXZmQOYxmyVP9a3pF
+qWDmsNxDA8ttYnMIc+nUAgCDJ84ScwQ1GvoCUD1b1cFNzvvhEHsNb4D/XbdrFcFG
+wEkeyivUsojdq2YnGjYSgauqyNWbeEgBrWzUe5USYysmziL/KAubcUjIbeRGxyPS
+6iQ2kbvfEJJPgocWTfLs5j61FObO+MVlj+PEmxWbcsIRv/pnG2V2FPJ8evhzgvp7
+cG9imZPM6dWHzc/ZFdi3Bcs51RtStsvPqXv4icKIi+01h1MLHNBqwuUkIiiK7ooM
+lvnp+DiEsVSuYYKBdGTi+4+nduuYL2g8CTNJKZuC46dY7EcE3lRYZlxl7dwN3jfL
+PRlnNscs34dwhZa+b70Flia0U1DNF4jrIFFBSHD3TqMg0Z6kxp1TfxpeGOLOqnBW
+rr0GKehu9CGISQQYEQIACQIbDAUCURSbegAKCRBurq4iA8OVGv9TAJ9EeXVrRS3p
+PZkT1R21FszUc9LvmgCeMduh5IPGFWSx9MjUc7/j1QKYm7g=
+=per8
-----END PGP PUBLIC KEY BLOCK-----
diff --git a/man/.gitignore b/man/.gitignore
new file mode 100644
index 00000000000..5fc607b9e2f
--- /dev/null
+++ b/man/.gitignore
@@ -0,0 +1 @@
+/Makefile
diff --git a/qa/qa_scripts/RbdLib.pm b/qa/qa_scripts/RbdLib.pm
index d0749f49ec0..f203b8ac084 100755
--- a/qa/qa_scripts/RbdLib.pm
+++ b/qa/qa_scripts/RbdLib.pm
@@ -9,7 +9,7 @@ package RbdLib;
use Cwd;
use Exporter;
@ISA = 'Exporter';
-@EXPORT_OK = qw(perform_action create_image resize_image rename_image copy_image list_image info_image export_image import_image remove_image create_snapshots rollback_snapshots purge_snapshots list_snapshots remove_snapshot rbd_map rbd_unmap rbd_showmapped display_result _pre_clean_up _post_clean_up _create_rados_pool display_ceph_os_info $RADOS_LS $RADOS_MKPOOL $RADOS_RMPOOL $RBD_CREATE $RBD_RESIZE $RBD_INFO $RBD_REMOVE $RBD_RENAME $RBD_MV $RBD_LS $RBD_LIST $RBD_CLONE $RBD_EXPORT $RBD_IMPORT $RBD_CP $RBD_COPY $SNAP_CREATE $SNAP_LS $SNAP_LIST $SNAP_ROLLBACK $SNAP_PURGE $SNAP_REMOVE $POOL_RM_SUCCESS $POOL_MK_SUCCESS $RBD_EXISTS_ERR $RBD_WATCH $RBD_MAP $RBD_UNMAP $RBD_SHOWMAPPED get_command_output verify_action debug_msg tpass tfail log_results display_func_result $CLI_FLAG);
+@EXPORT_OK = qw(perform_action create_image resize_image rename_image copy_image list_image info_image export_image import_image remove_image create_snapshots protect_snapshot clone_image unprotect_snapshot rollback_snapshots purge_snapshots list_snapshots remove_snapshot rbd_map rbd_unmap rbd_showmapped display_result _pre_clean_up _post_clean_up _create_rados_pool display_ceph_os_info $RADOS_LS $RADOS_MKPOOL $RADOS_RMPOOL $RBD_CREATE $RBD_RESIZE $RBD_INFO $RBD_REMOVE $RBD_RENAME $RBD_MV $RBD_LS $RBD_LIST $RBD_CLONE $RBD_EXPORT $RBD_IMPORT $RBD_CP $RBD_COPY $SNAP_CREATE $SNAP_LS $SNAP_LIST $SNAP_ROLLBACK $SNAP_PURGE $SNAP_REMOVE $RBD_CHILDREN $RBD_FLATTEN $POOL_RM_SUCCESS $POOL_MK_SUCCESS $RBD_EXISTS_ERR $RBD_WATCH $RBD_MAP $RBD_UNMAP $RBD_SHOWMAPPED $RBD_FLATTEN $SNAP_PROTECT $SNAP_UNPROTECT get_command_output verify_action debug_msg tpass tfail log_results display_func_result $CLI_FLAG );
use Pod::Usage();
use Getopt::Long();
@@ -50,8 +50,16 @@ our $RBD_MAP = "sudo rbd map";
our $RBD_UNMAP = "sudo rbd unmap";
our $RBD_SHOWMAPPED = "rbd showmapped";
our $RADOS_LS = "rados ls";
+our $SNAP_PROTECT = "rbd snap protect";
+our $SNAP_UNPROTECT = "rbd snap unprotect";
+our $RBD_CHILDREN = "rbd children";
+our $RBD_FLATTEN = "rbd flatten";
+
#====Error messages========================
+our $RBD_CREATE_ERR = "size must be >= 0";
+our $RBD_EXTRA_ERR = "extraneous parameter";
+our $RBD_REQ_ERR = "expected integer";
our $RBD_RM_ERROR = "image name was not specified";
our $SNAP_LS_ERROR = "snap name was not specified";
our $SNAP_RM_ERROR = "remove failed";
@@ -69,7 +77,20 @@ our $RBD_IMP_ERR = "import failed";
our $RBD_MAP_ERR = "add failed";
our $RBD_UNMAP_ERR = "remove failed";
our $RBD_INFO_SNAP_ERR = "error setting snapshot context";
-
+our $SNAP_PROTECT_ERR = "Device or resource busy";
+our $SNAP_PROTECT_RM_ERR = "protected from removal";
+our $SNAP_PROTECT_ERR1 = "No such file or directory";
+our $SNAP_UNPROT_ERR = "snap_unprotect: image must support layering";
+our $SNAP_UNPROT_ERR1 = "snap_unprotect: can't unprotect";
+#our $SNAP_UNPROTECT_ERR - bug # 4045
+our $SNAP_PROT_ERR = "snap_protect: image must support layering";
+our $CLONE_UNPROTECT_ERR = "parent snapshot must be protected";
+our $CLONE_ARG_ERR = "destination image name was not specified";
+our $CLONE_PARENT_ERR = "error opening parent image";
+our $CLONE_PF_ERR = "parent image must be in new format";
+our $FLATTEN_ERR = "librbd: parent snapshot must be protected";
+our $FLATTEN_IMG_ERR = "librbd: image has no parent";
+
#=======Success messages=======================
our $POOL_MK_SUCCESS = "successfully created pool";
@@ -81,6 +102,7 @@ our $RBD_EXP_SUCCESS = "Exporting image: 100%";
our $RBD_IMP_SUCCESS = "Importing image: 100%";
our $SNAP_ROLLBACK_SUCCESS = "Rolling back to snapshot: 100%";
our $SNAP_PURGE_SUCCESS = "Removing all snapshots: 100%";
+our $RBD_FLATTEN_SUCCESS = "Image flatten: 100% complete";
#===========Variables used in the script========
@@ -367,6 +389,18 @@ sub validate_cmd_output {
elsif ( ( $act =~ /$RBD_MAP/ ) && ( $cmd_op !~ /./ ) ) {
pass("$act $args passed");
}
+ elsif ( ( $act =~ /$SNAP_PROTECT/ ) && ( $cmd_op !~ /./ ) ) {
+ pass("$act $args passed");
+ }
+ elsif ( ( $act =~ /$SNAP_UNPROTECT/ ) && ( $cmd_op !~ /./ ) ) {
+ pass("$act $args passed");
+ }
+ elsif ( ( $act =~ /$RBD_CLONE/ ) && ( $cmd_op !~ /./ ) ) {
+ pass("$act $args passed");
+ }
+ elsif ( ( $act =~ /$RBD_FLATTEN/ ) && ( $cmd_op =~ /$RBD_FLATTEN_SUCCESS/ ) ) {
+ pass("$act $args passed");
+ }
elsif ( ( $act =~ /$RBD_UNMAP/ ) && ( $cmd_op !~ /$RBD_UNMAP_ERR/ ) ) {
pass("$act $args passed");
}
@@ -402,6 +436,21 @@ sub validate_cmd_output {
|| ( $cmd_op =~ /$SNAP_ROLLBACK_ERR/ )
|| ( $cmd_op =~ /$RBD_MAP_ERR/ )
|| ( $cmd_op =~ /$RBD_UNMAP_ERR/ )
+ || ( $cmd_op =~ /$RBD_CREATE_ERR/ )
+ || ( $cmd_op =~ /$RBD_EXTRA_ERR/ )
+ || ( $cmd_op =~ /$RBD_REQ_ERR/ )
+ || ( $cmd_op =~ /$SNAP_PROTECT_ERR/ )
+ || ( $cmd_op =~ /$SNAP_PROTECT_ERR1/ )
+ || ( $cmd_op =~ /$SNAP_PROTECT_RM_ERR/ )
+ || ( $cmd_op =~ /$SNAP_PROT_ERR/ )
+ || ( $cmd_op =~ /$SNAP_UNPROT_ERR/ )
+ || ( $cmd_op =~ /$SNAP_UNPROT_ERR1/ )
+ || ( $cmd_op =~ /$CLONE_UNPROTECT_ERR/ )
+ || ( $cmd_op =~ /$CLONE_ARG_ERR/ )
+ || ( $cmd_op =~ /$CLONE_PARENT_ERR/ )
+ || ( $cmd_op =~ /$CLONE_PF_ERR/ )
+ || ( $cmd_op =~ /$FLATTEN_ERR/ )
+ || ( $cmd_op =~ /$FLATTEN_IMG_ERR/ )
|| ( $cmd_op =~ /$RBD_INFO_SNAP_ERR/ ) )
)
{
@@ -472,8 +521,9 @@ sub ceph_os_info
sub display_ceph_os_info
{
my ($vceph, $vos) = ceph_os_info();
- my $msg = "The Tests are running on";
- debug_msg ( "$msg\n$vos$vceph",1 );
+ my $dat = get_command_output ( "date" );
+ my $msg = "The Tests were executed on $dat";
+ debug_msg ( "$msg\n$vos$vceph\n",1 );
open( TC, '>>log.txt' );
print TC "[Log] $vceph\n";
close (TC);
diff --git a/qa/qa_scripts/rbd_cli_tests.pl b/qa/qa_scripts/rbd_cli_tests.pl
index dcfc6f19560..4c8b5a9afa5 100755
--- a/qa/qa_scripts/rbd_cli_tests.pl
+++ b/qa/qa_scripts/rbd_cli_tests.pl
@@ -42,7 +42,7 @@ For Example,for "nova" user, 'export CEPH_ARGS="--keyring /etc/ceph/ceph.keyring
=cut
use Cwd;
-use RbdLib qw(perform_action create_image resize_image rename_image copy_image list_image info_image export_image import_image remove_image create_snapshots rollback_snapshots purge_snapshots list_snapshots remove_snapshot rbd_map rbd_unmap rbd_showmapped display_result _pre_clean_up _post_clean_up _create_rados_pool display_ceph_os_info $RADOS_MKPOOL $RADOS_RMPOOL $RBD_CREATE $RBD_RESIZE $RBD_INFO $RBD_REMOVE $RBD_RENAME $RBD_MV $RBD_LS $RBD_LIST $RBD_CLONE $RBD_EXPORT $RBD_IMPORT $RBD_CP $RBD_COPY $SNAP_CREATE $SNAP_LS $SNAP_LIST $SNAP_ROLLBACK $SNAP_PURGE $SNAP_REMOVE $POOL_RM_SUCCESS $POOL_MK_SUCCESS $RBD_EXISTS_ERR $RBD_WATCH $RBD_MAP $RBD_UNMAP $RBD_SHOWMAPPED get_command_output debug_msg $CLI_FLAG);
+use RbdLib qw(perform_action create_image resize_image rename_image copy_image list_image info_image export_image import_image remove_image create_snapshots protect_snapshot unprotect_snapshot clone_image rollback_snapshots purge_snapshots list_snapshots remove_snapshot rbd_map rbd_unmap rbd_showmapped display_result _pre_clean_up _post_clean_up _create_rados_pool display_ceph_os_info $RADOS_MKPOOL $RADOS_RMPOOL $RBD_CREATE $RBD_RESIZE $RBD_INFO $RBD_REMOVE $RBD_RENAME $RBD_MV $RBD_LS $RBD_LIST $RBD_CLONE $RBD_EXPORT $RBD_IMPORT $RBD_CP $RBD_COPY $SNAP_CREATE $SNAP_PROTECT $SNAP_UNPROTECT $SNAP_LS $SNAP_LIST $SNAP_ROLLBACK $SNAP_PURGE $SNAP_REMOVE $POOL_RM_SUCCESS $POOL_MK_SUCCESS $RBD_EXISTS_ERR $RBD_WATCH $RBD_MAP $RBD_UNMAP $RBD_SHOWMAPPED $RBD_CHILDREN $RBD_FLATTEN get_command_output debug_msg $CLI_FLAG);
use Pod::Usage();
use Getopt::Long();
@@ -67,9 +67,15 @@ our $snap_name = "snap1";
our $snap_name2 = "snap2";
our $snap_name3 = "snap3";
our $snap_name4 = "snap4";
+our $snap_name5 = "snap5";
+our $snap_new = "snap_new";
+our $clone_new = "clone_new";
+our $clone_new1 = "clone_new1";
+our $clone_new2 = "clone_new2";
+our $snap_test = "snap_test";
our $new_rbd_img = "new_rbd_img";
our $non_existing_img = "rbdimage";
-our $cp_new = "new";
+our $cp_new = "newest";
our $exp_file = "rbd_test_file1";
our $exp_file1 = "rbd_test_file2";
our $exp_file2 = "rbd_test_file3";
@@ -80,7 +86,7 @@ our $rbd_snap_new = "new";
our $neg_img_name = "neg_img";
our $new_img_name = "new_img";
our $max_img_name = "max_img";
-our $img_name1 = "test_img1";
+our $img_name1 = "testing_img1";
our $rbd_imp_test = "new_test_file";
our $non_pool_name = "no_pool";
our $no_snap = "no_snap";
@@ -98,12 +104,12 @@ sub create_image {
perform_action ( $RBD_CREATE, "$img_name,pool $pool_name,size 1024", 0 );
perform_action( $RBD_CREATE, "$img_name_mv,pool $pool_name,size 1024", 0 );
- perform_action( $RBD_CREATE, "$img_name1,pool $pool_name,size 0,order 22",
- 3 );
- perform_action( $RBD_CREATE, "$img_name1,pool $pool_name,size 0", 3 );
- perform_action( $RBD_CREATE, "$neg_img_name,pool $pool_name,size -1", 3 );
- perform_action( $RBD_CREATE, "$img_name1 pool $pool_name", 3 );
- perform_action( $RBD_CREATE, "--size 1024", 3 );
+ perform_action( $RBD_CREATE, "$img_name_mv,pool $pool_name,size 0,order 22",
+ 1 );
+ perform_action( $RBD_CREATE, "$img_name1,pool $pool_name,size 0", 0 );
+ perform_action( $RBD_CREATE, "$neg_img_name,pool $pool_name,size -1", 2 );
+ perform_action( $RBD_CREATE, "$img_name1 pool $pool_name", 2 );
+ perform_action( $RBD_CREATE, "--size 1024", 2 );
perform_action( $RBD_CREATE,
"$max_img_name,pool $pool_name,size 1024000000000", 0 );
perform_action( $RBD_CREATE, "$img_name1,pool $pool_name,size 2048,order",
@@ -128,6 +134,58 @@ sub create_snapshots {
0 );
perform_action( $SNAP_CREATE, "--snap $snap_name4 $pool_name\/$img_name",
0 );
+ perform_action( $SNAP_CREATE, "--snap $snap_new $pool_name\/$new_img_name",
+ 0 );
+}
+
+# Tests to protect snapshot
+sub protect_snapshot {
+ perform_action( $SNAP_PROTECT, "--snap $snap_new $pool_name\/$new_img_name",
+ 0 );
+ perform_action( $SNAP_PROTECT, "--snap $snap_new $pool_name\/$new_img_name",
+ 2 );
+ perform_action( $SNAP_PROTECT, "--snap $snap_name4 $pool_name\/$img_name",
+ 2 );
+ perform_action( $SNAP_PROTECT, "--snap $snap_test $pool_name\/$img_name",
+ 2 );
+}
+
+# Tests to unprotect snapshot
+sub unprotect_snapshot {
+ perform_action( $SNAP_UNPROTECT, "--snap $snap_new $pool_name\/$new_img_name",
+ 0 );
+ perform_action( $SNAP_UNPROTECT, "--snap $snap_new $pool_name\/$new_img_name",
+ 2 );
+ perform_action( $SNAP_UNPROTECT, "--snap $snap_name4 $pool_name\/$img_name",
+ 2 );
+ perform_action( $SNAP_UNPROTECT, "--snap $snap_test $pool_name\/$img_name",
+ 2 );
+}
+
+# clone protected snapshot
+sub clone_image {
+ perform_action( $RBD_CLONE, "$pool_name\/$new_img_name\@$snap_new $pool_name\/$clone_new",
+ 0 );
+ perform_action( $RBD_CLONE, "$pool_name\/$new_img_name\@$snap_new $pool_name\/$clone_new",
+ 1 );
+ perform_action( $RBD_CLONE, "$pool_name\/$new_img_name\@$snap_name5 $pool_name\/$clone_new1",
+ 2 );
+ perform_action( $RBD_CLONE, "$pool_name\/$img_name\@$snap_test $pool_name\/$clone_new1",
+ 2 );
+ perform_action( $RBD_CLONE, "$pool_name\/$img_name\@$snap_name5 $pool_name\/$clone_new2",
+ 2 );
+ perform_action( $RBD_CLONE, "$pool_name\/$img_name\@$snap_new",
+ 2 );
+ perform_action( $RBD_CLONE, "$pool_name\/$img_name",
+ 2 );
+}
+
+#flatten image
+sub rbd_flatten {
+ perform_action( $RBD_FLATTEN, "$pool_name\/$clone_new", 0);
+ perform_action( $RBD_FLATTEN, "$pool_name\/$clone_new", 2);
+ perform_action( $RBD_FLATTEN, "$pool_name\/$clone_new2", 2);
+ perform_action( $RBD_FLATTEN, "$pool_name\/$new_img_name", 2);
}
# Tests to rollback snapshot
@@ -144,6 +202,7 @@ sub rollback_snapshot {
sub purge_snapshots {
perform_action( $SNAP_PURGE, "$pool_name\/$img_name", 0 );
perform_action( $SNAP_PURGE, "$pool_name\/$new_rbd_img", 2 );
+ perform_action( $SNAP_PURGE, "$pool_name\/$new_img_name", 2 );
}
# Tests to list snapshots for an image
@@ -154,6 +213,7 @@ sub list_snapshots {
# Tests for remove snapshots
sub remove_snapshot {
perform_action( $SNAP_REMOVE, "$pool_name\/$img_name\@$snap_name", 0 );
+ perform_action( $SNAP_REMOVE, "$pool_name\/$new_img_name\@$snap_new", 2 );
perform_action( $SNAP_REMOVE, "$non_pool_name\/$img_name\@$snap_name3", 2 );
perform_action( $SNAP_REMOVE, "$pool_name\/$img_name\@$snap_name2", 0 );
perform_action( $SNAP_REMOVE, "$pool_name\/$non_existing_img", 2 );
@@ -208,7 +268,7 @@ sub remove_image {
sub export_image {
perform_action( $RBD_EXPORT, "$pool_name\/$img_name $exp_file", 0 );
perform_action( $RBD_EXPORT, "$pool_name\/$img_name .", 2 );
- perform_action( $RBD_EXPORT, "$pool_name\/$img_name", 2 );
+ perform_action( $RBD_EXPORT, "$pool_name\/$img_name", 0 );
perform_action( $RBD_EXPORT,
"--snap $snap_name $pool_name\/$img_name $exp_file1", 0 );
perform_action( $RBD_EXPORT,
@@ -309,12 +369,16 @@ rename_image();
resize_image();
info_image();
create_snapshots();
+protect_snapshot();
export_image();
import_image();
list_snapshots();
rollback_snapshot();
remove_snapshot();
purge_snapshots();
+clone_image();
+rbd_flatten();
+unprotect_snapshot();
copy_image();
remove_image();
display_result();
diff --git a/qa/run_xfstests_qemu.sh b/qa/run_xfstests_qemu.sh
index 449658fb5c3..08c136bdafa 100644
--- a/qa/run_xfstests_qemu.sh
+++ b/qa/run_xfstests_qemu.sh
@@ -1,7 +1,8 @@
#!/bin/bash
mkdir /tmp/cephtest
-wget https://raw.github.com/ceph/ceph/master/qa/run_xfstests.sh
+#wget https://raw.github.com/ceph/ceph/master/qa/run_xfstests.sh
+wget -O run_xfstests.sh 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=qa/run_xfstests.sh'
chmod +x run_xfstests.sh
# tests excluded fail in the current testing vm regardless of whether
# rbd is used
diff --git a/qa/workunits/cls/test_cls_lock.sh b/qa/workunits/cls/test_cls_lock.sh
index 1f767edaa59..c1452705329 100755
--- a/qa/workunits/cls/test_cls_lock.sh
+++ b/qa/workunits/cls/test_cls_lock.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_cls_lock
+ceph_test_cls_lock
exit 0
diff --git a/qa/workunits/cls/test_cls_rbd.sh b/qa/workunits/cls/test_cls_rbd.sh
index 06f1421e996..b973fd0dde5 100755
--- a/qa/workunits/cls/test_cls_rbd.sh
+++ b/qa/workunits/cls/test_cls_rbd.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_cls_rbd
+ceph_test_cls_rbd
exit 0
diff --git a/qa/workunits/cls/test_cls_refcount.sh b/qa/workunits/cls/test_cls_refcount.sh
index 69f721a69f9..d722f5ad930 100755
--- a/qa/workunits/cls/test_cls_refcount.sh
+++ b/qa/workunits/cls/test_cls_refcount.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_cls_refcount
+ceph_test_cls_refcount
exit 0
diff --git a/qa/workunits/cls/test_cls_rgw.sh b/qa/workunits/cls/test_cls_rgw.sh
index a8998f52732..b1f6621f2a8 100755
--- a/qa/workunits/cls/test_cls_rgw.sh
+++ b/qa/workunits/cls/test_cls_rgw.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_cls_rgw
+ceph_test_cls_rgw
exit 0
diff --git a/qa/workunits/direct_io/test_short_dio_read.c b/qa/workunits/direct_io/test_short_dio_read.c
index 7cc43959747..f65ce4546bd 100644
--- a/qa/workunits/direct_io/test_short_dio_read.c
+++ b/qa/workunits/direct_io/test_short_dio_read.c
@@ -3,12 +3,22 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
int main()
{
char buf[409600];
- int fd = open("shortfile", O_WRONLY|O_CREAT, 0644);
ssize_t r;
+ int err;
+ int fd = open("shortfile", O_WRONLY|O_CREAT, 0644);
+
+ if (fd < 0) {
+ err = errno;
+ printf("error: open() failed with: %d (%s)\n", err, strerror(err));
+ exit(err);
+ }
printf("writing first 3 bytes of 10k file\n");
r = write(fd, "foo", 3);
@@ -18,6 +28,12 @@ int main()
printf("reading O_DIRECT\n");
fd = open("shortfile", O_RDONLY|O_DIRECT);
+ if (fd < 0) {
+ err = errno;
+ printf("error: open() failed with: %d (%s)\n", err, strerror(err));
+ exit(err);
+ }
+
r = read(fd, buf, sizeof(buf));
close(fd);
diff --git a/qa/workunits/direct_io/test_sync_io.c b/qa/workunits/direct_io/test_sync_io.c
index 613631e5fcf..59d32cf972a 100644
--- a/qa/workunits/direct_io/test_sync_io.c
+++ b/qa/workunits/direct_io/test_sync_io.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <errno.h>
//#include "../client/ioctl.h"
@@ -19,10 +20,15 @@ void write_pattern()
{
printf("writing pattern\n");
- int fd = open("foo", O_CREAT|O_WRONLY, 0644);
uint64_t i;
int r;
+ int fd = open("foo", O_CREAT|O_WRONLY, 0644);
+ if (fd < 0) {
+ r = errno;
+ printf("write_pattern: error: open() failed with: %d (%s)\n", r, strerror(r));
+ exit(r);
+ }
for (i=0; i<1048576 * sizeof(i); i += sizeof(i)) {
r = write(fd, &i, sizeof(i));
}
@@ -41,7 +47,6 @@ int verify_pattern(char *buf, size_t len, uint64_t off)
printf("error: offset %llu had %llu\n", (unsigned long long)expected,
(unsigned long long)actual);
exit(1);
- return -1;
}
}
return 0;
@@ -57,13 +62,33 @@ void generate_pattern(void *buf, size_t len, uint64_t offset)
verify_pattern(buf, len, offset);
}
-int read_direct(int buf_align, uint64_t offset, int len)
-{
- printf("read_direct buf_align %d offset %llu len %d\n", buf_align,
+int read_file(int buf_align, uint64_t offset, int len, int direct) {
+
+ printf("read_file buf_align %d offset %llu len %d\n", buf_align,
(unsigned long long)offset, len);
- int fd = open("foo", O_RDONLY|O_DIRECT);
void *rawbuf;
- int r = posix_memalign(&rawbuf, 4096, len + buf_align);
+ int r;
+ int flags;
+ if(direct)
+ flags = O_RDONLY|O_DIRECT;
+ else
+ flags = O_RDONLY;
+
+ int fd = open("foo", flags);
+ if (fd < 0) {
+ int err = errno;
+ printf("read_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+ exit(err);
+ }
+
+ if (!direct)
+ ioctl(fd, CEPH_IOC_SYNCIO);
+
+ if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) {
+ printf("read_file: error: posix_memalign failed with %d", r);
+ exit (r);
+ }
+
void *buf = (char *)rawbuf + buf_align;
memset(buf, 0, len);
r = pread(fd, buf, len, offset);
@@ -73,32 +98,50 @@ int read_direct(int buf_align, uint64_t offset, int len)
return r;
}
+int read_direct(int buf_align, uint64_t offset, int len)
+{
+ printf("read_direct buf_align %d offset %llu len %d\n", buf_align,
+ (unsigned long long)offset, len);
+ return read_file(buf_align, offset, len, 1);
+}
+
int read_sync(int buf_align, uint64_t offset, int len)
{
printf("read_sync buf_align %d offset %llu len %d\n", buf_align,
(unsigned long long)offset, len);
- int fd = open("foo", O_RDONLY);
- ioctl(fd, CEPH_IOC_SYNCIO);
- void *rawbuf;
- int r = posix_memalign(&rawbuf, 4096, len + buf_align);
- void *buf = (char *)rawbuf + buf_align;
- memset(buf, 0, len);
- r = pread(fd, buf, len, offset);
- close(fd);
- r = verify_pattern(buf, len, offset);
- free(rawbuf);
- return r;
+ return read_file(buf_align, offset, len, 0);
}
-int write_direct(int buf_align, uint64_t offset, int len)
+int write_file(int buf_align, uint64_t offset, int len, int direct)
{
- printf("write_direct buf_align %d offset %llu len %d\n", buf_align,
+ printf("write_file buf_align %d offset %llu len %d\n", buf_align,
(unsigned long long)offset, len);
- int fd = open("foo", O_WRONLY|O_DIRECT|O_CREAT, 0644);
void *rawbuf;
- posix_memalign(&rawbuf, 4096, len + buf_align);
- void *buf = (char *)rawbuf + buf_align;
int r;
+ int err = 0;
+ int flags;
+ if (direct)
+ flags = O_WRONLY|O_DIRECT|O_CREAT;
+ else
+ flags = O_WRONLY|O_CREAT;
+
+ int fd = open("foo", flags, 0644);
+ if (fd < 0) {
+ int err = errno;
+ printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+ exit(err);
+ }
+
+ if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) {
+ printf("write_file: error: posix_memalign failed with %d", r);
+ err = r;
+ goto out_close;
+ }
+
+ if (!direct)
+ ioctl(fd, CEPH_IOC_SYNCIO);
+
+ void *buf = (char *)rawbuf + buf_align;
generate_pattern(buf, len, offset);
@@ -106,46 +149,47 @@ int write_direct(int buf_align, uint64_t offset, int len)
close(fd);
fd = open("foo", O_RDONLY);
+ if (fd < 0) {
+ err = errno;
+ printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err));
+ free(rawbuf);
+ goto out_unlink;
+ }
void *buf2 = malloc(len);
+ if (!buf2) {
+ err = -ENOMEM;
+ printf("write_file: error: malloc failed\n");
+ goto out_free;
+ }
+
memset(buf2, 0, len);
r = pread(fd, buf2, len, offset);
- close(fd);
-
r = verify_pattern(buf2, len, offset);
- unlink("foo");
- free(rawbuf);
free(buf2);
+out_free:
+ free(rawbuf);
+out_close:
+ close(fd);
+out_unlink:
+ unlink("foo");
+ if (err)
+ exit(err);
return r;
}
+int write_direct(int buf_align, uint64_t offset, int len)
+{
+ printf("write_direct buf_align %d offset %llu len %d\n", buf_align,
+ (unsigned long long)offset, len);
+ return write_file (buf_align, offset, len, 1);
+}
+
int write_sync(int buf_align, uint64_t offset, int len)
{
printf("write_sync buf_align %d offset %llu len %d\n", buf_align,
(unsigned long long)offset, len);
- int fd = open("foo", O_WRONLY|O_CREAT, 0644);
- ioctl(fd, CEPH_IOC_SYNCIO);
- void *rawbuf;
- int r = posix_memalign(&rawbuf, 4096, len + buf_align);
- void *buf = (char *)rawbuf + buf_align;
-
- generate_pattern(buf, len, offset);
-
- r = pwrite(fd, buf, len, offset);
- close(fd);
-
- fd = open("foo", O_RDONLY);
- void *buf2 = malloc(len);
- memset(buf2, 0, len);
- r = pread(fd, buf2, len, offset);
- close(fd);
-
- r = verify_pattern(buf2, len, offset);
-
- unlink("foo");
- free(buf2);
- free(rawbuf);
- return r;
+ return write_file (buf_align, offset, len, 0);
}
int main(int argc, char **argv)
diff --git a/qa/workunits/hadoop-internal-tests/test.sh b/qa/workunits/hadoop-internal-tests/test.sh
index 017a0bd411b..5b84761dee4 100755
--- a/qa/workunits/hadoop-internal-tests/test.sh
+++ b/qa/workunits/hadoop-internal-tests/test.sh
@@ -1,13 +1,12 @@
#!/bin/sh -e
-BASE=/tmp/cephtest
-TLIB=binary/usr/local/lib
-
echo "starting hadoop-internal-tests tests"
-export LD_LIBRARY_PATH=$BASE/$TLIB
-command1="cd $BASE/hadoop"
-command2="ant -Dextra.library.path=$BASE/$TLIB -Dceph.conf.file=$BASE/ceph.conf test -Dtestcase=TestCephFileSystem"
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+command1="cd $TESTDIR/hadoop"
+command2="ant -Dextra.library.path=$LD_LIBRARY_PATH -Dceph.conf.file=$CEPH_CONF -Dtestcase=TestCephFileSystem"
#print out the command
echo "----------------------"
diff --git a/qa/workunits/hadoop-wordcount/test.sh b/qa/workunits/hadoop-wordcount/test.sh
new file mode 100755
index 00000000000..256c118980a
--- /dev/null
+++ b/qa/workunits/hadoop-wordcount/test.sh
@@ -0,0 +1,47 @@
+#!/bin/sh -e
+
+echo "starting hadoop-wordcount test"
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+#command1="cd $TESTDIR/hadoop"
+#command2="ant -Dextra.library.path=$LD_LIBRARY_PATH -Dceph.conf.file=$CEPH_CONF -Dtestcase=TestCephFileSystem"
+
+command0="export JAVA_HOME=/usr/lib/jvm/default-java"
+command1="mkdir -p $TESTDIR/hadoop_input"
+command2="wget http://ceph.com/qa/hadoop_input_files.tar -O $TESTDIR/hadoop_input/files.tar"
+command3="cd $TESTDIR/hadoop_input"
+command4="tar -xf $TESTDIR/hadoop_input/files.tar"
+command5="$TESTDIR/hadoop/bin/hadoop fs -mkdir wordcount_input"
+command6="$TESTDIR/hadoop/bin/hadoop fs -put $TESTDIR/hadoop_input/*txt wordcount_input/"
+command7="$TESTDIR/hadoop/bin/hadoop jar $TESTDIR/hadoop/build/hadoop-example*jar wordcount wordcount_input wordcount_output"
+command8="rm -rf $TESTDIR/hadoop_input"
+
+
+#print out the command
+echo "----------------------"
+echo $command0
+echo $command1
+echo $command2
+echo $command3
+echo $command4
+echo $command5
+echo $command6
+echo $command7
+echo $command8
+echo "----------------------"
+
+#now execute the command
+$command0
+$command1
+$command2
+$command3
+$command4
+$command5
+$command6
+$command7
+$command8
+
+echo "completed hadoop-wordcount test"
+exit 0
diff --git a/qa/workunits/libcephfs/test.sh b/qa/workunits/libcephfs/test.sh
index ddaab184750..4a501e070c3 100755
--- a/qa/workunits/libcephfs/test.sh
+++ b/qa/workunits/libcephfs/test.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_libcephfs
+ceph_test_libcephfs
exit 0
diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh
new file mode 100755
index 00000000000..735646b5ca0
--- /dev/null
+++ b/qa/workunits/mon/crush_ops.sh
@@ -0,0 +1,23 @@
+#!/bin/sh -x
+
+set -e
+
+ceph osd crush dump
+ceph osd crush rule dump
+ceph osd crush rule ls
+ceph osd crush rule list
+
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple foo default host
+ceph osd crush rule create-simple bar default host
+
+ceph osd crush rule ls | grep foo
+
+ceph osd crush rule rm foo
+ceph osd crush rule rm foo # idempotent
+ceph osd crush rule rm bar
+
+# can't delete in-use rules, tho:
+ceph osd crush rule rm data && exit 1 || true
+
+echo OK
diff --git a/qa/workunits/mon/osd.sh b/qa/workunits/mon/osd.sh
index d5878b3fef7..75bf220f6bc 100755
--- a/qa/workunits/mon/osd.sh
+++ b/qa/workunits/mon/osd.sh
@@ -16,7 +16,7 @@ test $nb -ne $na
ceph osd rm $na
ceph osd rm $na
ceph osd rm $nb
-ceph osd rm 123123
+ceph osd rm 1000
na2=`ceph osd create $ua`
diff --git a/qa/workunits/mon/workloadgen.sh b/qa/workunits/mon/workloadgen.sh
index 33f76308f71..d43abe1bb10 100755
--- a/qa/workunits/mon/workloadgen.sh
+++ b/qa/workunits/mon/workloadgen.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -x
# vim: ts=8 sw=2 smarttab
#
# $0.sh - run mon workload generator
@@ -9,7 +9,7 @@ d() {
d "check for required binaries"
-required_bins="ceph crushtool test_mon_workloadgen"
+required_bins="ceph crushtool ceph_test_mon_workloadgen"
for b in $required_bins; do
which $b >& /dev/null
if [[ $? -ne 0 ]]; then
@@ -28,7 +28,7 @@ do_run=0
num_osds=0
# Assume the test is in PATH
-bin_test=test_mon_workloadgen
+bin_test=ceph_test_mon_workloadgen
num_osds=10
if [[ "$LOADGEN_NUM_OSDS" != "" ]]; then
@@ -38,12 +38,9 @@ fi
duration=300
[ ! -z $DURATION ] && duration=$DURATION
-extra=
-[ ! -z $TEST_CEPH_CONF ] && extra="$extra -c $TEST_CEPH_CONF"
-
d "checking osd tree"
-crush_testing_root="`ceph $extra osd tree | grep 'root[ \t]\+testing'`"
+crush_testing_root="`ceph osd tree | grep 'root[ \t]\+testing'`"
d "$crush_testing_root"
@@ -60,7 +57,7 @@ d "run_id = $run_id ; create_crush = $create_crush"
if [[ $create_crush -eq 1 ]]; then
tmp_crush_fn="/tmp/ceph.$run_id.crush"
- ceph $extra osd getcrushmap -o $tmp_crush_fn
+ ceph osd getcrushmap -o $tmp_crush_fn
crushtool -d $tmp_crush_fn -o $tmp_crush_fn.plain
highest_root_id=0
@@ -125,21 +122,21 @@ EOF
d "created crush"
- ceph $extra osd setcrushmap -i $tmp_crush_fn
+ ceph osd setcrushmap -i $tmp_crush_fn
fi
keyring="/tmp/ceph.$run_id.keyring"
-ceph $extra auth get-or-create-key osd.admin mon 'allow rwx' osd 'allow *'
-ceph $extra auth export | grep -v "export" > $keyring
+ceph auth get-or-create-key osd.admin mon 'allow rwx' osd 'allow *'
+ceph auth export | grep -v "export" > $keyring
osd_ids=""
for osd in `seq 1 $num_osds`; do
- id=`ceph $extra osd create`
+ id=`ceph osd create`
osd_ids="$osd_ids $id"
d "osd.$id"
- ceph $extra osd crush set $id osd.$id 1.0 host=testhost rack=testrack root=testing
+ ceph osd crush set $id osd.$id 1.0 host=testhost rack=testrack root=testing
done
d "osds: $osd_ids"
@@ -169,4 +166,4 @@ args="$EXTRA_ARGS --duration $duration $stub_id_args"
d "running: $args"
-$bin_test $extra --keyring $keyring $args
+$bin_test --keyring $keyring $args
diff --git a/qa/workunits/osdc/stress_objectcacher.sh b/qa/workunits/osdc/stress_objectcacher.sh
index 03a5c952e01..e6b9ec121ea 100755
--- a/qa/workunits/osdc/stress_objectcacher.sh
+++ b/qa/workunits/osdc/stress_objectcacher.sh
@@ -14,7 +14,7 @@ do
do
for MAX_DIRTY in 0 25165824
do
- test_objectcacher_stress --ops $OPS --percent-read $READS --delay-ns $DELAY --objects $OBJECTS --max-op-size $OP_SIZE --client-oc-max-dirty $MAX_DIRTY > /dev/null 2>&1
+ ceph_test_objectcacher_stress --ops $OPS --percent-read $READS --delay-ns $DELAY --objects $OBJECTS --max-op-size $OP_SIZE --client-oc-max-dirty $MAX_DIRTY > /dev/null 2>&1
done
done
done
diff --git a/qa/workunits/rados/stress_watch.sh b/qa/workunits/rados/stress_watch.sh
index d547207ce57..275414b26ed 100755
--- a/qa/workunits/rados/stress_watch.sh
+++ b/qa/workunits/rados/stress_watch.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_stress_watch
+ceph_test_stress_watch
exit 0
diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh
index 1671a9039d1..b18519ab34b 100755
--- a/qa/workunits/rados/test.sh
+++ b/qa/workunits/rados/test.sh
@@ -1,17 +1,17 @@
#!/bin/sh -e
-test_rados_api_aio
-test_rados_api_io
-test_rados_api_list
-test_rados_api_misc
-test_rados_api_pool
-test_rados_api_snapshots
-test_rados_api_stat
-test_rados_api_watch_notify
+ceph_test_rados_api_aio
+ceph_test_rados_api_io
+ceph_test_rados_api_list
+ceph_test_rados_api_misc
+ceph_test_rados_api_pool
+ceph_test_rados_api_snapshots
+ceph_test_rados_api_stat
+ceph_test_rados_api_watch_notify
-testrados_list_parallel
-testrados_open_pools_parallel
-testrados_delete_pools_parallel
-testrados_watch_notify
+ceph_test_rados_list_parallel
+ceph_test_rados_open_pools_parallel
+ceph_test_rados_delete_pools_parallel
+ceph_test_rados_watch_notify
exit 0
diff --git a/qa/workunits/rados/test_python.sh b/qa/workunits/rados/test_python.sh
index 7678cba863b..39595fe3329 100755
--- a/qa/workunits/rados/test_python.sh
+++ b/qa/workunits/rados/test_python.sh
@@ -1,6 +1,8 @@
#!/bin/sh -ex
CEPH_REF=${CEPH_REF:-master}
-wget -q https://raw.github.com/ceph/ceph/$CEPH_REF/src/test/pybind/test_rados.py
+#wget -q https://raw.github.com/ceph/ceph/$CEPH_REF/src/test/pybind/test_rados.py
+wget -O test_rados.py "https://ceph.com/git/?p=ceph.git;a=blob_plain;hb=$CEPH_REF;f=src/test/pybind/test_rados.py" || \
+ wget -O test_rados.py "https://ceph.com/git/?p=ceph.git;a=blob_plain;hb=ref/heads/$CEPH_REF;f=src/test/pybind/test_rados.py"
nosetests -v test_rados
exit 0
diff --git a/qa/workunits/rbd/map-snapshot-io.sh b/qa/workunits/rbd/map-snapshot-io.sh
index b53a81387d4..c16030e2d8e 100755
--- a/qa/workunits/rbd/map-snapshot-io.sh
+++ b/qa/workunits/rbd/map-snapshot-io.sh
@@ -19,6 +19,7 @@ dd if=/dev/zero of=/dev/rbd/rbd/image oflag=direct count=10
udevadm settle # udev is does blkid on device close; yeesh! see #4183
rbd unmap /dev/rbd/rbd/image
+rbd rm image
# wait a few seconds for the async kernel bits to clean themselves up
sleep 4
diff --git a/qa/workunits/rbd/run_cli_tests.sh b/qa/workunits/rbd/run_cli_tests.sh
new file mode 100755
index 00000000000..d628109c3ae
--- /dev/null
+++ b/qa/workunits/rbd/run_cli_tests.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+wget -q http://ceph.com/qa/rbd_cli_tests.pls
+wget -q http://ceph.com/qa/RbdLib.pm
+perl rbd_cli_tests.pls --pool test
+exit 0
+
diff --git a/qa/workunits/rbd/smalliobench.sh b/qa/workunits/rbd/smalliobench.sh
index 5cedc78e768..f25fae43bc9 100755
--- a/qa/workunits/rbd/smalliobench.sh
+++ b/qa/workunits/rbd/smalliobench.sh
@@ -10,7 +10,7 @@ DUR="$3"
for n in `seq 1 $NUM`; do
echo "Starting $n of $NUM ..."
- smalliobenchrbd --pool rbd --duration $DUR --disable-detailed-ops 1 &
+ ceph_smalliobenchrbd --pool rbd --duration $DUR --disable-detailed-ops 1 &
sleep $GAP
done
echo "Waiting..."
diff --git a/qa/workunits/rbd/test_librbd.sh b/qa/workunits/rbd/test_librbd.sh
index 6212357e18e..d35cfafb159 100755
--- a/qa/workunits/rbd/test_librbd.sh
+++ b/qa/workunits/rbd/test_librbd.sh
@@ -1,5 +1,5 @@
#!/bin/sh -e
-test_librbd
+ceph_test_librbd
exit 0
diff --git a/qa/workunits/rbd/test_librbd_python.sh b/qa/workunits/rbd/test_librbd_python.sh
index f3b2a47e4de..e975d17f503 100755
--- a/qa/workunits/rbd/test_librbd_python.sh
+++ b/qa/workunits/rbd/test_librbd_python.sh
@@ -1,6 +1,8 @@
#!/bin/sh -ex
CEPH_REF=${CEPH_REF:-master}
-wget -q https://raw.github.com/ceph/ceph/$CEPH_REF/src/test/pybind/test_rbd.py
+#wget -q https://raw.github.com/ceph/ceph/$CEPH_REF/src/test/pybind/test_rbd.py
+wget -O test_rbd.py "https://ceph.com/git/?p=ceph.git;a=blob_plain;hb=$CEPH_REF;f=src/test/pybind/test_rbd.py" || \
+ wget -O test_rbd.py "https://ceph.com/git/?p=ceph.git;a=blob_plain;hb=ref/heads/$CEPH_REF;f=src/test/pybind/test_rbd.py"
nosetests -v -e '.*test_remove_with_watcher' test_rbd
exit 0
diff --git a/src/.gitignore b/src/.gitignore
index f05c939cbc7..3db14b83554 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -1,120 +1,65 @@
+# generic entries
+Makefile
+
+# local directory specific entries
+/.git_version
+/.libs
+/acconfig.h*
+/ceph
+/cephfs
+/crushtool
+/ceph-authtool
+/ceph-conf
+/ceph-coverage
+/ceph-debugpack
+/ceph-dencoder
/ceph-fuse
/ceph-mds
/ceph-mon
-/ceph
/ceph-osd
/ceph-syn
-/ceph-dencoder
-/dupstore
-/fakefuse
-/fakesyn
-/mkmonmap
-/monmaptool
-/newsyn
-/authtool
-/ceph-authtool
-/crushtool
+/ceph.conf
+/ceph_bench_log
+/ceph_dupstore
+/ceph_filestore_dump
+/ceph_multi_stress_watch
+/ceph_psim
+/ceph_radosacl
+/ceph_rgw_jsonparser
+/ceph_rgw_multiparser
+/ceph_scratchtool
+/ceph_scratchtoolpp
+/ceph_streamtest
+/ceph_test_*
+/ceph_tpbench
+/ceph_xattr_bench
+/ceph_kvstorebench
+/ceph_omapbench
+/ceph_smalliobench
+/ceph_smalliobenchdumb
+/ceph_smalliobenchfs
+/ceph_smalliobenchrbd
+/ceph_ver.h
+/dev
+/init-ceph
+/keyring
+/librados-config
+/massif.out.*
/mkcephfs
+/mnt
+/monmaptool
/mount.ceph
/osdmaptool
+/out
/rados
-/rados_sync
-/radosacl
/radosgw
/radosgw-admin
-/rbdtool
-/rgw_jsonparser
-/rgw_multiparser
-/streamtest
-/bench_log
-/test_ioctls
-/test_trans
-/testceph
-/testcrypto
-/testkeys
-/testmsgr
-/testrados
-/testrados_delete_pool_while_open
-/testrados_watch_notify
-/testradospp
-/testdout_streambuf
-/testsignal_handlers
-/testtimers
-/test_addrs
-/test_libceph_build
-/test_librados_build
-/test_librgw_build
-/testrados
-/test_str_list
-/test_stress_watch
-/multi_stress_watch
-/test_store
-/test_libcommon_build
-/test_mutate
-/fsconverter
-/xattr_bench
-/rest-bench
-/rbd-fuse
-dev
-mondata
-mnt
-TAGS
-tags
-out
-acconfig.h.in
-acconfig.h
-/.libs
-
-*.so
-/crush/*.fpicco
-/CrushWrapper.pm
-/crush/CrushWrapper_wrap.cxx
-
-/.git_version
-/ceph-conf
-/ceph-debugpack
-/cephfs
-/ceph-coverage
-/dumpjournal
-/init-ceph
-/librados-config
/rbd
-/psim
+/rbd-fuse
+/rest-bench
/sample.fetch_config
-
-Makefile
-
-/gtest/build-aux/config.h
-/gtest/build-aux/config.h.in
-/gtest/lib/
-/gtest/scripts/gtest-config
-/gtest/src/.dirstamp
-/gtest/Makefile.in
-/gtest/aclocal.m4
-/gtest/configure
-/gtest/fused-src/
+/TAGS
+/tags
+/testmsgr
+/test_*
/unittest_*
-/ceph.conf
-/keyring
-/massif.out.*
-/testrados_list_parallel
-/testrados_open_pools_parallel
-/testrados_delete_pools_parallel
-/test_rados_api_aio
-/test_rados_api_io
-/test_rados_api_list
-/test_rados_api_pool
-/test_rados_api_stat
-/test_rados_api_watch_notify
-/test_rados_api_snapshots
-/test_rados_api_misc
-/test_librbd
-/test_librbd_fsx
-/scratchtool
-/scratchtoolpp
-/ceph-filestore-dump
-/smalliobench
-/smalliobenchdumb
-/smalliobenchfs
-/smalliobenchrbd
-/tpbench
diff --git a/src/Makefile.am b/src/Makefile.am
index efff334e045..17255882666 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,7 +19,8 @@ EXTRA_DIST = \
libs3/libs3.spec \
libs3/mswin \
libs3/src \
- libs3/test
+ libs3/test \
+ unittest_bufferlist.sh
CLEANFILES =
bin_PROGRAMS =
@@ -38,7 +39,7 @@ check_PROGRAMS =
# tests to actually run on "make check"; if you need extra, non-test,
# executables built, you need to replace this with manual assignments
# target by target
-TESTS = $(check_PROGRAMS)
+TESTS = $(check_PROGRAMS) unittest_bufferlist.sh
check-local:
$(srcdir)/test/encoding/check-generated.sh
@@ -115,7 +116,7 @@ ceph_filestore_dump_LDADD = libosd.a $(LIBOS_LDA) $(LIBGLOBAL_LDA) -lboost_progr
if LINUX
ceph_filestore_dump_LDADD += -ldl
endif
-bin_PROGRAMS += ceph ceph-conf ceph-authtool ceph-filestore-dump
+bin_PROGRAMS += ceph ceph-conf ceph-authtool ceph_filestore_dump
monmaptool_SOURCES = monmaptool.cc
monmaptool_LDADD = $(LIBGLOBAL_LDA)
@@ -128,9 +129,9 @@ bin_PROGRAMS += monmaptool crushtool osdmaptool
rgw_dencoder_src = rgw/rgw_dencoder.cc \
rgw/rgw_acl.cc
-ceph_dencoder_SOURCES = test/encoding/ceph_dencoder.cc ${rgw_dencoder_src}
+ceph_dencoder_SOURCES = test/encoding/ceph_dencoder.cc ${rgw_dencoder_src} perfglue/disabled_heap_profiler.cc
ceph_dencoder_CXXFLAGS = ${AM_CXXFLAGS}
-ceph_dencoder_LDADD = $(LIBGLOBAL_LDA) libcls_lock_client.a libcls_rgw_client.a libosd.a libmds.a $(LIBOS_LDA) libmon.a
+ceph_dencoder_LDADD = $(LIBGLOBAL_LDA) libcls_lock_client.a libcls_rgw_client.a libosd.a libmds.a libosdc.la $(LIBOS_LDA) libmon.a
bin_PROGRAMS += ceph-dencoder
mount_ceph_SOURCES = mount/mount.ceph.c common/armor.c common/safe_io.c common/secret.c include/addr_parsing.c
@@ -145,7 +146,7 @@ cephfs_LDADD = libcommon.la
bin_PROGRAMS += cephfs
librados_config_SOURCES = librados-config.cc
-librados_config_LDADD = libglobal.la librados.la $(EXTRALIBS) $(CRYPTO_LIBS)
+librados_config_LDADD = libglobal.la librados.la $(PTHREAD_LIBS) $(EXTRALIBS) $(CRYPTO_LIBS)
bin_PROGRAMS += librados-config
# synthetic client
@@ -198,106 +199,106 @@ ceph_mon_SOURCES += perfglue/disabled_heap_profiler.cc
endif # WITH_TCMALLOC
# debug targets
-psim_SOURCES = psim.cc
-psim_LDADD = $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += psim
-
-test_mutate_SOURCES = test/test_mutate.cc
-test_mutate_LDADD = libglobal.la librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-bin_DEBUGPROGRAMS += test_mutate
-
-test_rewrite_latency_SOURCES = test/test_rewrite_latency.cc
-test_rewrite_latency_LDADD = libcommon.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-bin_DEBUGPROGRAMS += test_rewrite_latency
-
-testmsgr_SOURCES = testmsgr.cc
-testmsgr_LDADD = $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += testmsgr
-
-test_ioctls_SOURCES = client/test_ioctls.c
-bin_DEBUGPROGRAMS += test_ioctls
-
-dupstore_SOURCES = dupstore.cc
-dupstore_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-dupstore_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-streamtest_SOURCES = streamtest.cc
-streamtest_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-streamtest_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += dupstore streamtest
-
-test_trans_SOURCES = test_trans.cc
-test_trans_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-test_trans_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += test_trans
-
-testrados_SOURCES = test/osd/TestRados.cc test/osd/TestOpStat.cc test/osd/Object.cc test/osd/RadosModel.cc
-testrados_LDADD = librados.la $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += testrados
-
-smalliobench_SOURCES = test/bench/small_io_bench.cc test/bench/rados_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
-smalliobench_LDADD = librados.la -lboost_program_options $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += smalliobench
-
-smalliobenchfs_SOURCES = test/bench/small_io_bench_fs.cc test/bench/filestore_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
-smalliobenchfs_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-smalliobenchfs_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += smalliobenchfs
-
-smalliobenchdumb_SOURCES = test/bench/small_io_bench_dumb.cc test/bench/dumb_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
-smalliobenchdumb_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-smalliobenchdumb_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += smalliobenchdumb
-
-smalliobenchrbd_SOURCES = test/bench/small_io_bench_rbd.cc test/bench/rbd_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
-smalliobenchrbd_LDADD = librados.la librbd.la -lboost_program_options $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += smalliobenchrbd
-
-tpbench_SOURCES = test/bench/tp_bench.cc test/bench/detailed_stat_collector.cc
-tpbench_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += tpbench
-
-omapbench_SOURCES = test/omap_bench.cc
-omapbench_LDADD = librados.la $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += omapbench
-
-kvstorebench_SOURCES = test/kv_store_bench.cc key_value_store/kv_flat_btree_async.cc
-kvstorebench_LDADD = librados.la $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += kvstorebench
-
-multi_stress_watch_SOURCES = test/multi_stress_watch.cc test/librados/test.cc
-multi_stress_watch_LDADD = librados.la $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += multi_stress_watch
+ceph_psim_SOURCES = psim.cc
+ceph_psim_LDADD = $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_psim
+
+ceph_test_mutate_SOURCES = test/test_mutate.cc
+ceph_test_mutate_LDADD = libglobal.la librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+bin_DEBUGPROGRAMS += ceph_test_mutate
+
+ceph_test_rewrite_latency_SOURCES = test/test_rewrite_latency.cc
+ceph_test_rewrite_latency_LDADD = libcommon.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+bin_DEBUGPROGRAMS += ceph_test_rewrite_latency
+
+ceph_test_msgr_SOURCES = testmsgr.cc
+ceph_test_msgr_LDADD = $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_msgr
+
+ceph_test_ioctls_SOURCES = client/test_ioctls.c
+bin_DEBUGPROGRAMS += ceph_test_ioctls
+
+ceph_dupstore_SOURCES = dupstore.cc
+ceph_dupstore_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+ceph_dupstore_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_streamtest_SOURCES = streamtest.cc
+ceph_streamtest_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+ceph_streamtest_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_dupstore ceph_streamtest
+
+ceph_test_trans_SOURCES = test_trans.cc
+ceph_test_trans_CXXFLAGS= ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+ceph_test_trans_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_trans
+
+ceph_test_rados_SOURCES = test/osd/TestRados.cc test/osd/TestOpStat.cc test/osd/Object.cc test/osd/RadosModel.cc
+ceph_test_rados_LDADD = librados.la $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_rados
+
+ceph_smalliobench_SOURCES = test/bench/small_io_bench.cc test/bench/rados_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
+ceph_smalliobench_LDADD = librados.la -lboost_program_options $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_smalliobench
+
+ceph_smalliobenchfs_SOURCES = test/bench/small_io_bench_fs.cc test/bench/filestore_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
+ceph_smalliobenchfs_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_smalliobenchfs_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_smalliobenchfs
+
+ceph_smalliobenchdumb_SOURCES = test/bench/small_io_bench_dumb.cc test/bench/dumb_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
+ceph_smalliobenchdumb_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_smalliobenchdumb_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_smalliobenchdumb
+
+ceph_smalliobenchrbd_SOURCES = test/bench/small_io_bench_rbd.cc test/bench/rbd_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
+ceph_smalliobenchrbd_LDADD = librados.la librbd.la -lboost_program_options $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_smalliobenchrbd
+
+ceph_tpbench_SOURCES = test/bench/tp_bench.cc test/bench/detailed_stat_collector.cc
+ceph_tpbench_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_tpbench
+
+ceph_omapbench_SOURCES = test/omap_bench.cc
+ceph_omapbench_LDADD = librados.la $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_omapbench
+
+ceph_kvstorebench_SOURCES = test/kv_store_bench.cc key_value_store/kv_flat_btree_async.cc
+ceph_kvstorebench_LDADD = librados.la $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_kvstorebench
+
+ceph_multi_stress_watch_SOURCES = test/multi_stress_watch.cc test/librados/test.cc
+ceph_multi_stress_watch_LDADD = librados.la $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_multi_stress_watch
if WITH_BUILD_TESTS
-test_libcommon_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files)
-test_libcommon_build_LDADD = $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-bin_DEBUGPROGRAMS += test_libcommon_build
+ceph_test_libcommon_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files)
+ceph_test_libcommon_build_LDADD = $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+bin_DEBUGPROGRAMS += ceph_test_libcommon_build
-test_librados_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) $(librados_SOURCES)
-test_librados_build_LDADD = $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-test_librados_build_CXXFLAGS = $(AM_CXXFLAGS)
-bin_DEBUGPROGRAMS += test_librados_build
+ceph_test_librados_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) $(librados_SOURCES)
+ceph_test_librados_build_LDADD = $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+ceph_test_librados_build_CXXFLAGS = $(AM_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph_test_librados_build
-test_librgw_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) \
+ceph_test_librgw_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) \
$(librados_SOURCES) $(librgw_la_SOURCES)
-test_librgw_build_LDADD = -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-test_librgw_build_CXXFLAGS = $(AM_CXXFLAGS)
-bin_DEBUGPROGRAMS += test_librgw_build
+ceph_test_librgw_build_LDADD = -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+ceph_test_librgw_build_CXXFLAGS = $(AM_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph_test_librgw_build
-test_libcephfs_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) \
+ceph_test_libcephfs_build_SOURCES = test/test_libcommon_build.cc $(libcommon_files) \
$(libosdc_la_SOURCES)
-test_libcephfs_build_LDADD = libcephfs.la -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-test_libcephfs_build_CXXFLAGS = $(AM_CXXFLAGS)
-bin_DEBUGPROGRAMS += test_libcephfs_build
+ceph_test_libcephfs_build_LDADD = libcephfs.la -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+ceph_test_libcephfs_build_CXXFLAGS = $(AM_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph_test_libcephfs_build
endif
if WITH_HADOOPCLIENT
-test_libhadoopcephfs_build_SOURCES = test/test_libcommon_build.cc \
+ceph_test_libhadoopcephfs_build_SOURCES = test/test_libcommon_build.cc \
$(libhadoopcephfs_la_SOURCES) \
$(libosdc_la_SOURCES) $(libcommon_files)
-test_libhadoopcephfs_build_LDADD = libcephfs.la -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-test_libhadoopcephfs_build_CXXFLAGS = $(AM_CXXFLAGS)
-bin_DEBUGPROGRAMS += test_libhadoopcephfs_build
+ceph_test_libhadoopcephfs_build_LDADD = libcephfs.la -lexpat $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+ceph_test_libhadoopcephfs_build_CXXFLAGS = $(AM_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph_test_libhadoopcephfs_build
endif
##########
@@ -316,13 +317,13 @@ libcephfs_la_LDFLAGS = $(PTHREAD_LIBS) $(CRYPTO_LIBS) $(EXTRALIBS) \
${AM_LDFLAGS} -version-info 1:0:0 -export-symbols-regex '^ceph_.*'
lib_LTLIBRARIES += libcephfs.la
-testtimers_SOURCES = test/TestTimers.cc
-testtimers_LDADD = $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += testtimers
+ceph_test_timers_SOURCES = test/TestTimers.cc
+ceph_test_timers_LDADD = $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_timers
-testsignal_handlers_SOURCES = test/TestSignalHandlers.cc
-testsignal_handlers_LDADD = $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += testsignal_handlers
+ceph_test_signal_handlers_SOURCES = test/TestSignalHandlers.cc
+ceph_test_signal_handlers_LDADD = $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_signal_handlers
# librados
librados_SOURCES = \
@@ -392,15 +393,15 @@ radosgw_admin_CXXFLAGS = ${AM_CXXFLAGS}
radosgw_admin_LDADD = $(my_radosgw_ldadd)
bin_PROGRAMS += radosgw-admin
-rgw_multiparser_SOURCES = rgw/rgw_multiparser.cc
-rgw_multiparser_CXXFLAGS = ${AM_CXXFLAGS}
-rgw_multiparser_LDADD = $(my_radosgw_ldadd)
-bin_DEBUGPROGRAMS += rgw_multiparser
+ceph_rgw_multiparser_SOURCES = rgw/rgw_multiparser.cc
+ceph_rgw_multiparser_CXXFLAGS = ${AM_CXXFLAGS}
+ceph_rgw_multiparser_LDADD = $(my_radosgw_ldadd)
+bin_DEBUGPROGRAMS += ceph_rgw_multiparser
-rgw_jsonparser_SOURCES = rgw/rgw_jsonparser.cc
-rgw_jsonparser_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-rgw_jsonparser_LDADD = $(my_radosgw_ldadd)
-bin_DEBUGPROGRAMS += rgw_jsonparser
+ceph_rgw_jsonparser_SOURCES = rgw/rgw_jsonparser.cc
+ceph_rgw_jsonparser_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+ceph_rgw_jsonparser_LDADD = $(my_radosgw_ldadd)
+bin_DEBUGPROGRAMS += ceph_rgw_jsonparser
endif
@@ -448,13 +449,13 @@ endif
endif
-scratchtool_SOURCES = scratchtool.c
-scratchtool_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-scratchtoolpp_SOURCES = scratchtoolpp.cc
-scratchtoolpp_LDADD = librados.la $(PTHREAD_LIBS) -lm
-radosacl_SOURCES = radosacl.cc
-radosacl_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-bin_DEBUGPROGRAMS += scratchtool scratchtoolpp radosacl
+ceph_scratchtool_SOURCES = scratchtool.c
+ceph_scratchtool_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+ceph_scratchtoolpp_SOURCES = scratchtoolpp.cc
+ceph_scratchtoolpp_LDADD = librados.la $(PTHREAD_LIBS) -lm
+ceph_radosacl_SOURCES = radosacl.cc
+ceph_radosacl_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+bin_DEBUGPROGRAMS += ceph_scratchtool ceph_scratchtoolpp ceph_radosacl
rbd_SOURCES = rbd.cc common/fiemap.cc common/secret.c common/TextTable.cc common/util.cc
rbd_CXXFLAGS = ${AM_CXXFLAGS}
@@ -464,20 +465,20 @@ bin_PROGRAMS += rbd
endif
-testcrypto_SOURCES = testcrypto.cc
-testcrypto_LDADD = $(LIBGLOBAL_LDA)
-testcrypto_CXXFLAGS = ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += testcrypto
+ceph_test_crypto_SOURCES = testcrypto.cc
+ceph_test_crypto_LDADD = $(LIBGLOBAL_LDA)
+ceph_test_crypto_CXXFLAGS = ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_crypto
-testkeys_SOURCES = testkeys.cc
-testkeys_LDADD = libmon.a $(LIBGLOBAL_LDA)
-testkeys_CXXFLAGS = ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += testkeys
+ceph_test_keys_SOURCES = testkeys.cc
+ceph_test_keys_LDADD = libmon.a $(LIBGLOBAL_LDA)
+ceph_test_keys_CXXFLAGS = ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_keys
if WITH_TCMALLOC
-testkeys_LDADD += -ltcmalloc
-testkeys_CXXFLAGS += ${tcmalloc_safety_flags}
-testkeys_SOURCES += perfglue/heap_profiler.cc
+ceph_test_keys_LDADD += -ltcmalloc
+ceph_test_keys_CXXFLAGS += ${tcmalloc_safety_flags}
+ceph_test_keys_SOURCES += perfglue/heap_profiler.cc
endif
@@ -583,41 +584,41 @@ libsystest_la_SOURCES = \
libsystest_la_LIBADD = libglobal.la
noinst_LTLIBRARIES += libsystest.la
-testrados_list_parallel_SOURCES = \
+ceph_test_rados_list_parallel_SOURCES = \
test/system/rados_list_parallel.cc \
test/system/st_rados_create_pool.cc \
test/system/st_rados_list_objects.cc
-testrados_list_parallel_LDADD = libsystest.la librados.la
-bin_DEBUGPROGRAMS += testrados_list_parallel
+ceph_test_rados_list_parallel_LDADD = libsystest.la librados.la
+bin_DEBUGPROGRAMS += ceph_test_rados_list_parallel
-testrados_open_pools_parallel_SOURCES = \
+ceph_test_rados_open_pools_parallel_SOURCES = \
test/system/rados_open_pools_parallel.cc \
test/system/st_rados_create_pool.cc
-testrados_open_pools_parallel_LDADD = libsystest.la librados.la
-bin_DEBUGPROGRAMS += testrados_open_pools_parallel
+ceph_test_rados_open_pools_parallel_LDADD = libsystest.la librados.la
+bin_DEBUGPROGRAMS += ceph_test_rados_open_pools_parallel
-testrados_delete_pools_parallel_SOURCES = \
+ceph_test_rados_delete_pools_parallel_SOURCES = \
test/system/rados_delete_pools_parallel.cc \
test/system/st_rados_create_pool.cc \
test/system/st_rados_delete_pool.cc \
test/system/st_rados_list_objects.cc
-testrados_delete_pools_parallel_LDADD = libsystest.la librados.la
-bin_DEBUGPROGRAMS += testrados_delete_pools_parallel
+ceph_test_rados_delete_pools_parallel_LDADD = libsystest.la librados.la
+bin_DEBUGPROGRAMS += ceph_test_rados_delete_pools_parallel
-testrados_watch_notify_SOURCES = \
+ceph_test_rados_watch_notify_SOURCES = \
test/system/rados_watch_notify.cc \
test/system/st_rados_create_pool.cc \
test/system/st_rados_delete_pool.cc \
test/system/st_rados_delete_objs.cc \
test/system/st_rados_watch.cc \
test/system/st_rados_notify.cc
-testrados_watch_notify_LDADD = libsystest.la librados.la
-bin_DEBUGPROGRAMS += testrados_watch_notify
+ceph_test_rados_watch_notify_LDADD = libsystest.la librados.la
+bin_DEBUGPROGRAMS += ceph_test_rados_watch_notify
-bench_log_SOURCES = \
+ceph_bench_log_SOURCES = \
test/bench_log.cc
-bench_log_LDADD = libcommon.la libglobal.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
-bin_DEBUGPROGRAMS += bench_log
+ceph_bench_log_LDADD = libcommon.la libglobal.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
+bin_DEBUGPROGRAMS += ceph_bench_log
## unit tests
@@ -671,6 +672,12 @@ unittest_log_LDADD = libcommon.la ${UNITTEST_LDADD}
unittest_log_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} -O2
check_PROGRAMS += unittest_log
+unittest_throttle_SOURCES = test/common/Throttle.cc
+unittest_throttle_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
+unittest_throttle_LDADD = libcommon.la ${LIBGLOBAL_LDA} ${UNITTEST_LDADD}
+unittest_throttle_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} -O2
+check_PROGRAMS += unittest_throttle
+
unittest_base64_SOURCES = test/base64.cc
unittest_base64_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
unittest_base64_LDADD = libcephfs.la -lm ${UNITTEST_LDADD}
@@ -761,6 +768,12 @@ unittest_escape_LDADD = ${UNITTEST_LDADD} $(LIBGLOBAL_LDA)
unittest_escape_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
check_PROGRAMS += unittest_escape
+unittest_chain_xattr_SOURCES = test/filestore/chain_xattr.cc
+unittest_chain_xattr_LDFLAGS = ${AM_LDFLAGS}
+unittest_chain_xattr_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+unittest_chain_xattr_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+check_PROGRAMS += unittest_chain_xattr
+
unittest_strtol_SOURCES = test/strtol.cc
unittest_strtol_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
unittest_strtol_LDADD = ${UNITTEST_LDADD} $(LIBGLOBAL_LDA)
@@ -834,193 +847,193 @@ unittest_texttable_LDADD = librados.la ${UNITTEST_LDADD}
unittest_texttable_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
check_PROGRAMS += unittest_texttable
-test_librbd_SOURCES = test/librbd/test_librbd.cc test/librados/test.cc
-test_librbd_LDADD = librbd.la librados.la ${UNITTEST_STATIC_LDADD}
-test_librbd_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_librbd
+ceph_test_librbd_SOURCES = test/librbd/test_librbd.cc test/librados/test.cc
+ceph_test_librbd_LDADD = librbd.la librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_librbd_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_librbd
-test_librbd_fsx_SOURCES = test/librbd/fsx.c
-test_librbd_fsx_LDADD = librbd.la librados.la -lm
-test_librbd_fsx_CFLAGS = ${AM_CFLAGS} -Wno-format
-bin_DEBUGPROGRAMS += test_librbd_fsx
+ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.c
+ceph_test_librbd_fsx_LDADD = librbd.la librados.la -lm
+ceph_test_librbd_fsx_CFLAGS = ${AM_CFLAGS} -Wno-format
+bin_DEBUGPROGRAMS += ceph_test_librbd_fsx
-test_cls_rbd_SOURCES = test/cls_rbd/test_cls_rbd.cc \
+ceph_test_cls_rbd_SOURCES = test/cls_rbd/test_cls_rbd.cc \
test/librados/test.cc \
cls/rbd/cls_rbd_client.cc \
cls/lock/cls_lock_client.cc \
cls/lock/cls_lock_types.cc \
cls/lock/cls_lock_ops.cc
-test_cls_rbd_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_cls_rbd_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_cls_rbd
+ceph_test_cls_rbd_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_cls_rbd_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_cls_rbd
-test_cls_refcount_SOURCES = test/cls_refcount/test_cls_refcount.cc \
+ceph_test_cls_refcount_SOURCES = test/cls_refcount/test_cls_refcount.cc \
test/librados/test.cc
-test_cls_refcount_LDADD = librados.la libcls_refcount_client.a ${UNITTEST_STATIC_LDADD}
-test_cls_refcount_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_cls_refcount
+ceph_test_cls_refcount_LDADD = librados.la libcls_refcount_client.a ${UNITTEST_STATIC_LDADD}
+ceph_test_cls_refcount_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_cls_refcount
-test_cls_lock_SOURCES = test/cls_lock/test_cls_lock.cc test/librados/test.cc
-test_cls_lock_LDFLAGS = ${AM_LDFLAGS}
-test_cls_lock_LDADD = libcls_lock_client.a librados.la ${UNITTEST_STATIC_LDADD}
-test_cls_lock_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_cls_lock
+ceph_test_cls_lock_SOURCES = test/cls_lock/test_cls_lock.cc test/librados/test.cc
+ceph_test_cls_lock_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_cls_lock_LDADD = libcls_lock_client.a librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_cls_lock_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_cls_lock
if WITH_RADOSGW
-test_cls_rgw_SOURCES = test/cls_rgw/test_cls_rgw.cc \
+ceph_test_cls_rgw_SOURCES = test/cls_rgw/test_cls_rgw.cc \
test/librados/test.cc
-test_cls_rgw_LDADD = librados.la libcls_rgw_client.a ${UNITTEST_STATIC_LDADD}
-test_cls_rgw_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_cls_rgw
+ceph_test_cls_rgw_LDADD = librados.la libcls_rgw_client.a ${UNITTEST_STATIC_LDADD}
+ceph_test_cls_rgw_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_cls_rgw
endif
-test_mon_workloadgen_SOURCES = \
+ceph_test_mon_workloadgen_SOURCES = \
test/mon/test_mon_workloadgen.cc \
osdc/Objecter.cc \
osdc/Striper.cc
-test_mon_workloadgen_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_mon_workloadgen_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_mon_workloadgen
-
-test_rados_api_io_SOURCES = test/librados/io.cc test/librados/test.cc
-test_rados_api_io_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_io_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_io_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_io
-
-test_rados_api_aio_SOURCES = test/librados/aio.cc test/librados/test.cc
-test_rados_api_aio_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_aio_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_aio_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_aio
-
-test_rados_api_list_SOURCES = test/librados/list.cc test/librados/test.cc
-test_rados_api_list_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_list_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_list_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_list
-
-test_rados_api_pool_SOURCES = test/librados/pool.cc test/librados/test.cc
-test_rados_api_pool_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_pool_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_pool_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_pool
-
-test_rados_api_stat_SOURCES = test/librados/stat.cc test/librados/test.cc
-test_rados_api_stat_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_stat_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_stat_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_stat
-
-test_rados_api_watch_notify_SOURCES = test/librados/watch_notify.cc test/librados/test.cc
-test_rados_api_watch_notify_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_watch_notify_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_watch_notify_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_watch_notify
-
-test_rados_api_snapshots_SOURCES = test/librados/snapshots.cc test/librados/test.cc
-test_rados_api_snapshots_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_snapshots_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_snapshots_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_snapshots
-
-test_rados_api_cls_SOURCES = test/librados/cls.cc test/librados/test.cc
-test_rados_api_cls_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_cls_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_cls_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_cls
-
-test_rados_api_misc_SOURCES = test/librados/misc.cc test/librados/test.cc
-test_rados_api_misc_LDFLAGS = ${AM_LDFLAGS}
-test_rados_api_misc_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_rados_api_misc_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_rados_api_misc
-
-test_libcephfs_SOURCES = test/libcephfs/test.cc test/libcephfs/readdir_r_cb.cc test/libcephfs/caps.cc
-test_libcephfs_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
-test_libcephfs_LDADD = ${UNITTEST_STATIC_LDADD} libcephfs.la
-test_libcephfs_CXXFLAGS = $(AM_CXXFLAGS) ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_libcephfs
-
-test_filestore_SOURCES = test/filestore/store_test.cc
-test_filestore_LDFLAGS = ${AM_LDFLAGS}
-test_filestore_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_filestore_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_filestore
-
-test_filestore_workloadgen_SOURCES = \
+ceph_test_mon_workloadgen_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_mon_workloadgen_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_mon_workloadgen
+
+ceph_test_rados_api_io_SOURCES = test/librados/io.cc test/librados/test.cc
+ceph_test_rados_api_io_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_io_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_io_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_io
+
+ceph_test_rados_api_aio_SOURCES = test/librados/aio.cc test/librados/test.cc
+ceph_test_rados_api_aio_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_aio_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_aio_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_aio
+
+ceph_test_rados_api_list_SOURCES = test/librados/list.cc test/librados/test.cc
+ceph_test_rados_api_list_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_list_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_list_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_list
+
+ceph_test_rados_api_pool_SOURCES = test/librados/pool.cc test/librados/test.cc
+ceph_test_rados_api_pool_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_pool_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_pool_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_pool
+
+ceph_test_rados_api_stat_SOURCES = test/librados/stat.cc test/librados/test.cc
+ceph_test_rados_api_stat_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_stat_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_stat_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_stat
+
+ceph_test_rados_api_watch_notify_SOURCES = test/librados/watch_notify.cc test/librados/test.cc
+ceph_test_rados_api_watch_notify_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_watch_notify_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_watch_notify_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_watch_notify
+
+ceph_test_rados_api_snapshots_SOURCES = test/librados/snapshots.cc test/librados/test.cc
+ceph_test_rados_api_snapshots_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_snapshots_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_snapshots_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_snapshots
+
+ceph_test_rados_api_cls_SOURCES = test/librados/cls.cc test/librados/test.cc
+ceph_test_rados_api_cls_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_cls_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_cls_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_cls
+
+ceph_test_rados_api_misc_SOURCES = test/librados/misc.cc test/librados/test.cc
+ceph_test_rados_api_misc_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_rados_api_misc_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_rados_api_misc_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_rados_api_misc
+
+ceph_test_libcephfs_SOURCES = test/libcephfs/test.cc test/libcephfs/readdir_r_cb.cc test/libcephfs/caps.cc
+ceph_test_libcephfs_LDFLAGS = $(PTHREAD_CFLAGS) ${AM_LDFLAGS}
+ceph_test_libcephfs_LDADD = ${UNITTEST_STATIC_LDADD} libcephfs.la
+ceph_test_libcephfs_CXXFLAGS = $(AM_CXXFLAGS) ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_libcephfs
+
+ceph_test_filestore_SOURCES = test/filestore/store_test.cc
+ceph_test_filestore_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_filestore_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_filestore_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_filestore
+
+ceph_test_filestore_workloadgen_SOURCES = \
test/filestore/workload_generator.cc \
test/filestore/TestFileStoreState.cc
-test_filestore_workloadgen_LDFLAGS = ${AM_LDFLAGS}
-test_filestore_workloadgen_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_filestore_workloadgen_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_filestore_workloadgen
+ceph_test_filestore_workloadgen_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_filestore_workloadgen_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_filestore_workloadgen_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_filestore_workloadgen
-test_filestore_idempotent_SOURCES = test/filestore/test_idempotent.cc test/filestore/FileStoreTracker.cc test/common/ObjectContents.cc
-test_filestore_idempotent_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_filestore_idempotent_CXXFLAGS = $(AM_CXXFLAGS) $(LEVELDB_INCLUDE)
-bin_DEBUGPROGRAMS += test_filestore_idempotent
+ceph_test_filestore_idempotent_SOURCES = test/filestore/test_idempotent.cc test/filestore/FileStoreTracker.cc test/common/ObjectContents.cc
+ceph_test_filestore_idempotent_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_filestore_idempotent_CXXFLAGS = $(AM_CXXFLAGS) $(LEVELDB_INCLUDE)
+bin_DEBUGPROGRAMS += ceph_test_filestore_idempotent
-test_filestore_idempotent_sequence_SOURCES = \
+ceph_test_filestore_idempotent_sequence_SOURCES = \
test/filestore/test_idempotent_sequence.cc \
test/filestore/DeterministicOpSequence.cc \
test/filestore/TestFileStoreState.cc \
test/filestore/FileStoreDiff.cc
-test_filestore_idempotent_sequence_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
-test_filestore_idempotent_sequence_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-bin_DEBUGPROGRAMS += test_filestore_idempotent_sequence
-
-xattr_bench_SOURCES = test/xattr_bench.cc
-xattr_bench_LDFLAGS = ${AM_LDFLAGS}
-xattr_bench_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-xattr_bench_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
-bin_DEBUGPROGRAMS += xattr_bench
-
-test_filejournal_SOURCES = test/test_filejournal.cc
-test_filejournal_LDFLAGS = ${AM_LDFLAGS}
-test_filejournal_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_filejournal_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_filejournal
-
-test_stress_watch_SOURCES = test/test_stress_watch.cc test/librados/test.cc
-test_stress_watch_LDFLAGS = ${AM_LDFLAGS}
-test_stress_watch_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
-test_stress_watch_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_stress_watch
-
-test_objectcacher_stress_SOURCES = test/osdc/object_cacher_stress.cc test/osdc/FakeWriteback.cc osdc/ObjectCacher.cc
-test_objectcacher_stress_LDFLAGS = ${AM_LDFLAGS}
-test_objectcacher_stress_LDADD = $(LIBGLOBAL_LDA)
-test_objectcacher_stress_CXXFLAGS = ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_objectcacher_stress
-
-test_object_map_SOURCES = test/ObjectMap/test_object_map.cc test/ObjectMap/KeyValueDBMemory.cc os/DBObjectMap.cc os/LevelDBStore.cc
-test_object_map_LDFLAGS = ${AM_LDFLAGS}
-test_object_map_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_object_map_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_object_map
-
-test_keyvaluedb_atomicity_SOURCES = test/ObjectMap/test_keyvaluedb_atomicity.cc os/LevelDBStore.cc
-test_keyvaluedb_atomicity_LDFLAGS = ${AM_LDFLAGS}
-test_keyvaluedb_atomicity_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_keyvaluedb_atomicity_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_keyvaluedb_atomicity
-
-test_keyvaluedb_iterators_SOURCES = test/ObjectMap/test_keyvaluedb_iterators.cc \
+ceph_test_filestore_idempotent_sequence_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
+ceph_test_filestore_idempotent_sequence_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+bin_DEBUGPROGRAMS += ceph_test_filestore_idempotent_sequence
+
+ceph_xattr_bench_SOURCES = test/xattr_bench.cc
+ceph_xattr_bench_LDFLAGS = ${AM_LDFLAGS}
+ceph_xattr_bench_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_xattr_bench_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_xattr_bench
+
+ceph_test_filejournal_SOURCES = test/test_filejournal.cc
+ceph_test_filejournal_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_filejournal_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_filejournal_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_filejournal
+
+ceph_test_stress_watch_SOURCES = test/test_stress_watch.cc test/librados/test.cc
+ceph_test_stress_watch_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_stress_watch_LDADD = librados.la ${UNITTEST_STATIC_LDADD}
+ceph_test_stress_watch_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_stress_watch
+
+ceph_test_objectcacher_stress_SOURCES = test/osdc/object_cacher_stress.cc test/osdc/FakeWriteback.cc osdc/ObjectCacher.cc
+ceph_test_objectcacher_stress_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_objectcacher_stress_LDADD = $(LIBGLOBAL_LDA)
+ceph_test_objectcacher_stress_CXXFLAGS = ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_objectcacher_stress
+
+ceph_test_object_map_SOURCES = test/ObjectMap/test_object_map.cc test/ObjectMap/KeyValueDBMemory.cc os/DBObjectMap.cc os/LevelDBStore.cc
+ceph_test_object_map_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_object_map_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_object_map_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_object_map
+
+ceph_test_keyvaluedb_atomicity_SOURCES = test/ObjectMap/test_keyvaluedb_atomicity.cc os/LevelDBStore.cc
+ceph_test_keyvaluedb_atomicity_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_keyvaluedb_atomicity_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_keyvaluedb_atomicity_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_keyvaluedb_atomicity
+
+ceph_test_keyvaluedb_iterators_SOURCES = test/ObjectMap/test_keyvaluedb_iterators.cc \
test/ObjectMap/KeyValueDBMemory.cc \
os/LevelDBStore.cc
-test_keyvaluedb_iterators_LDFLAGS = ${AM_LDFLAGS}
-test_keyvaluedb_iterators_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
-test_keyvaluedb_iterators_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_keyvaluedb_iterators
+ceph_test_keyvaluedb_iterators_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_keyvaluedb_iterators_LDADD = ${UNITTEST_STATIC_LDADD} $(LIBOS_LDA) $(LIBGLOBAL_LDA)
+ceph_test_keyvaluedb_iterators_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} $(LEVELDB_INCLUDE) ${CRYPTO_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_keyvaluedb_iterators
-test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
-test_cfuse_cache_invalidate_LDFLAGS = ${AM_LDFLAGS}
-test_cfuse_cache_invalidate_LDADD =
-test_cfuse_cache_invalidate_CXXFLAGS = ${AM_CXXFLAGS}
-bin_DEBUGPROGRAMS += test_cfuse_cache_invalidate
+ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
+ceph_test_cfuse_cache_invalidate_LDFLAGS = ${AM_LDFLAGS}
+ceph_test_cfuse_cache_invalidate_LDADD =
+ceph_test_cfuse_cache_invalidate_CXXFLAGS = ${AM_CXXFLAGS}
+bin_DEBUGPROGRAMS += ceph_test_cfuse_cache_invalidate
# shell scripts
editpaths = sed \
@@ -1109,7 +1122,6 @@ EXTRA_DIST += \
$(srcdir)/upstart/ceph-osd.conf \
$(srcdir)/upstart/ceph-osd-all.conf \
$(srcdir)/upstart/ceph-osd-all-starter.conf \
- $(srcdir)/upstart/ceph-hotplug.conf \
$(srcdir)/upstart/ceph-mds.conf \
$(srcdir)/upstart/ceph-mds-all.conf \
$(srcdir)/upstart/ceph-mds-all-starter.conf \
@@ -1223,6 +1235,7 @@ libcommon_files = \
auth/Crypto.cc \
auth/KeyRing.cc \
auth/RotatingKeyRing.cc \
+ common/DecayCounter.cc \
common/LogClient.cc \
common/LogEntry.cc \
common/PrebufferedStreambuf.cc \
@@ -1233,7 +1246,6 @@ libcommon_files = \
common/admin_socket.cc \
common/admin_socket_client.cc \
common/escape.c \
- common/types.cc \
common/Clock.cc \
common/Throttle.cc \
common/Timer.cc \
@@ -1272,6 +1284,8 @@ libcommon_files = \
osd/OSDMap.cc \
osd/osd_types.cc \
mds/MDSMap.cc \
+ mds/inode_backtrace.cc \
+ mds/mdstypes.cc \
common/blkdev.cc \
common/common_init.cc \
common/pipe.c \
@@ -1335,6 +1349,8 @@ libmon_a_CXXFLAGS= ${AM_CXXFLAGS}
noinst_LIBRARIES += libmon.a
libmds_a_SOURCES = \
+ mds/Anchor.cc \
+ mds/Capability.cc \
mds/Dumper.cc \
mds/Resetter.cc \
mds/MDS.cc \
@@ -1357,6 +1373,7 @@ libmds_a_SOURCES = \
mds/MDSTableServer.cc \
mds/AnchorServer.cc \
mds/AnchorClient.cc \
+ mds/SnapRealm.cc \
mds/SnapServer.cc \
mds/snap.cc \
mds/SessionMap.cc \
@@ -1405,7 +1422,7 @@ libclient_la_SOURCES = \
client/Inode.cc \
client/Dentry.cc \
client/MetaRequest.cc \
- client/SnapRealm.cc \
+ client/ClientSnapRealm.cc \
client/MetaSession.cc \
client/Trace.cc
libclient_la_LIBADD = libosdc.la $(LIBEDIT_LIBS)
@@ -1457,7 +1474,7 @@ noinst_HEADERS = \
client/Inode.h\
client/MetaRequest.h\
client/MetaSession.h\
- client/SnapRealm.h\
+ client/ClientSnapRealm.h\
client/SyntheticClient.h\
client/Trace.h\
client/fuse_ll.h\
@@ -1683,6 +1700,7 @@ noinst_HEADERS = \
mds/SessionMap.h\
mds/SimpleLock.h\
mds/SnapClient.h\
+ mds/SnapRealm.h\
mds/SnapServer.h\
mds/events/ECommitted.h\
mds/events/EExport.h\
@@ -1695,7 +1713,6 @@ noinst_HEADERS = \
mds/events/ESession.h\
mds/events/ESessions.h\
mds/events/ESlaveUpdate.h\
- mds/events/EString.h\
mds/events/ESubtreeMap.h\
mds/events/ETableClient.h\
mds/events/ETableServer.h\
@@ -1865,6 +1882,7 @@ noinst_HEADERS = \
osdc/WritebackHandler.h\
perfglue/cpu_profiler.h\
perfglue/heap_profiler.h\
+ rgw/logrotate.conf\
rgw/rgw_acl.h\
rgw/rgw_acl_s3.h\
rgw/rgw_acl_swift.h\
diff --git a/src/auth/cephx/CephxProtocol.h b/src/auth/cephx/CephxProtocol.h
index 38e0616b501..9386a410f95 100644
--- a/src/auth/cephx/CephxProtocol.h
+++ b/src/auth/cephx/CephxProtocol.h
@@ -466,7 +466,7 @@ void encode_encrypt_enc_bl(CephContext *cct, const T& t, const CryptoKey& key,
}
template <typename T>
-int decode_decrypt(CephContext *cct, T& t, const CryptoKey key,
+int decode_decrypt(CephContext *cct, T& t, const CryptoKey& key,
bufferlist::iterator& iter, std::string &error)
{
bufferlist bl_enc;
diff --git a/src/ceph-create-keys b/src/ceph-create-keys
index 438e51d3076..272bb3ec6ef 100755
--- a/src/ceph-create-keys
+++ b/src/ceph-create-keys
@@ -190,6 +190,7 @@ def main():
wait_for_quorum(cluster=args.cluster, mon_id=args.id)
get_key(cluster=args.cluster, mon_id=args.id)
+
bootstrap_key(
cluster=args.cluster,
type_='osd',
@@ -203,6 +204,17 @@ def main():
),
)
+ bootstrap_key(
+ cluster=args.cluster,
+ type_='mds',
+ caps=dict(
+ mon=[
+ r'allow command auth get-or-create * osd allow\ * mds allow mon allow\ rwx',
+ 'allow command mon getmap',
+ ],
+ ),
+ )
+
if __name__ == '__main__':
main()
diff --git a/src/ceph-disk-activate b/src/ceph-disk-activate
index f78ae17ce88..1eb696490e3 100755
--- a/src/ceph-disk-activate
+++ b/src/ceph-disk-activate
@@ -5,11 +5,19 @@ import errno
import logging
import os
import os.path
+import platform
import re
import subprocess
+import stat
import sys
import tempfile
+init_systems = [
+ 'upstart',
+ 'sysvinit',
+ 'systemd',
+ 'auto',
+ ]
log_name = __name__
if log_name == '__main__':
@@ -64,6 +72,10 @@ class UnmountError(ActivateError):
def maybe_mkdir(*a, **kw):
+ # remove any symlink, if it is there..
+ if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
+ log.debug('Removing old symlink at %s', *a)
+ os.unlink(*a)
try:
os.mkdir(*a, **kw)
except OSError, e:
@@ -180,7 +192,7 @@ def allocate_osd_id(
try:
osd_id = _check_output(
args=[
- 'ceph',
+ '/usr/bin/ceph',
'--cluster', cluster,
'--name', 'client.bootstrap-osd',
'--keyring', keyring,
@@ -205,7 +217,7 @@ def mkfs(
monmap = os.path.join(path, 'activate.monmap')
subprocess.check_call(
args=[
- 'ceph',
+ '/usr/bin/ceph',
'--cluster', cluster,
'--name', 'client.bootstrap-osd',
'--keyring', keyring,
@@ -215,7 +227,7 @@ def mkfs(
subprocess.check_call(
args=[
- 'ceph-osd',
+ '/usr/bin/ceph-osd',
'--cluster', cluster,
'--mkfs',
'--mkkey',
@@ -239,7 +251,7 @@ def auth_key(
):
subprocess.check_call(
args=[
- 'ceph',
+ '/usr/bin/ceph',
'--cluster', cluster,
'--name', 'client.bootstrap-osd',
'--keyring', keyring,
@@ -265,7 +277,7 @@ def move_mount(
maybe_mkdir(osd_data)
subprocess.check_call(
args=[
- 'mount',
+ '/bin/mount',
'--move',
'--',
path,
@@ -274,35 +286,57 @@ def move_mount(
)
-def upstart_start(
+def start_daemon(
cluster,
osd_id,
):
- log.debug('Starting service...')
- subprocess.check_call(
- args=[
- 'initctl',
- # use emit, not start, because start would fail if the
- # instance was already running
- 'emit',
- # since the daemon starting doesn't guarantee much about
- # the service being operational anyway, don't bother
- # waiting for it
- '--no-wait',
- '--',
- 'ceph-osd',
- 'cluster={cluster}'.format(cluster=cluster),
- 'id={osd_id}'.format(osd_id=osd_id),
- ],
- )
+ log.debug('Starting %s osd.%s...', cluster, osd_id)
+ path = '/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
+ cluster=cluster, osd_id=osd_id)
+
+ # upstart?
+ try:
+ if os.path.exists(os.path.join(path,'upstart')):
+ subprocess.check_call(
+ args=[
+ '/sbin/initctl',
+ # use emit, not start, because start would fail if the
+ # instance was already running
+ 'emit',
+ # since the daemon starting doesn't guarantee much about
+ # the service being operational anyway, don't bother
+ # waiting for it
+ '--no-wait',
+ '--',
+ 'ceph-osd',
+ 'cluster={cluster}'.format(cluster=cluster),
+ 'id={osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ elif os.path.exists(os.path.join(path, 'sysvinit')):
+ subprocess.check_call(
+ args=[
+ '/usr/sbin/service',
+ 'ceph',
+ 'start',
+ 'osd.{osd_id}'.format(osd_id=osd_id),
+ ],
+ )
+ else:
+ raise ActivateError('{cluster} osd.{osd_id} is not tagged with an init system'.format(
+ cluster=cluster,
+ osd_id=osd_id,
+ ))
+ except subprocess.CalledProcessError as e:
+ raise ActivateError('ceph osd start failed', e)
def detect_fstype(
dev,
):
fstype = _check_output(
args=[
- 'blkid',
+ '/sbin/blkid',
# we don't want stale cached results
'-p',
'-s', 'TYPE',
@@ -319,7 +353,7 @@ def get_conf(cluster, variable):
try:
p = subprocess.Popen(
args=[
- 'ceph-conf',
+ '/usr/bin/ceph-conf',
'--cluster={cluster}'.format(
cluster=cluster,
),
@@ -374,7 +408,7 @@ def mount(
try:
subprocess.check_call(
args=[
- 'mount',
+ '/bin/mount',
'-o', options,
'--',
dev,
@@ -397,7 +431,7 @@ def unmount(
try:
subprocess.check_call(
args=[
- 'umount',
+ '/bin/umount',
'--',
path,
],
@@ -405,34 +439,124 @@ def unmount(
except subprocess.CalledProcessError as e:
raise UnmountError(e)
-
-def activate(
- path,
+def mount_activate(
+ dev,
activate_key_template,
- do_mount,
+ init,
):
- if do_mount:
- try:
- fstype = detect_fstype(dev=path)
- except (subprocess.CalledProcessError,
- TruncatedLineError,
- TooManyLinesError) as e:
- raise FilesystemTypeError(
- 'device {dev}'.format(dev=path),
- e,
- )
+ try:
+ fstype = detect_fstype(dev=dev)
+ except (subprocess.CalledProcessError,
+ TruncatedLineError,
+ TooManyLinesError) as e:
+ raise FilesystemTypeError(
+ 'device {dev}'.format(dev=dev),
+ e,
+ )
+
+ # TODO always using mount options from cluster=ceph for
+ # now; see http://tracker.newdream.net/issues/3253
+ mount_options = get_conf(
+ cluster='ceph',
+ variable='osd_mount_options_{fstype}'.format(
+ fstype=fstype,
+ ),
+ )
+ if mount_options is None:
mount_options = get_conf(
- # TODO always using mount options from cluster=ceph for
- # now; see http://tracker.newdream.net/issues/3253
cluster='ceph',
variable='osd_fs_mount_options_{fstype}'.format(
fstype=fstype,
),
)
- path = mount(dev=path, fstype=fstype, options=mount_options)
+ #remove whitespaces from mount_options
+ if mount_options is not None:
+ mount_options = "".join(mount_options.split())
+
+ path = mount(dev=dev, fstype=fstype, options=mount_options)
+
+ osd_id = None
+ cluster = None
+ try:
+ (osd_id, cluster) = activate(path, activate_key_template, init)
+
+ # check if the disk is already active
+ active = False
+ src_dev = os.stat(path).st_dev
+ try:
+ dst_dev = os.stat('/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
+ cluster=cluster,
+ osd_id=osd_id)).st_dev
+ if src_dev == dst_dev:
+ active = True
+ except:
+ pass
+ if active:
+ log.info('%s osd.%s already mounted in position; unmounting ours.' % (cluster, osd_id))
+ unmount(path)
+ else:
+ move_mount(
+ path=path,
+ cluster=cluster,
+ osd_id=osd_id,
+ )
+ return (cluster, osd_id)
+
+ except:
+ log.error('Failed to activate')
+ unmount(path)
+ raise
+ finally:
+ # remove out temp dir
+ os.rmdir(path)
+
+
+def activate_dir(
+ path,
+ activate_key_template,
+ init,
+ ):
+
+ if not os.path.exists(path):
+ raise ActivateError(
+ 'directory %s does not exist' % path
+ )
+
+ (osd_id, cluster) = activate(path, activate_key_template, init)
+ canonical = '/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
+ cluster=cluster,
+ osd_id=osd_id)
+ if path != canonical:
+ # symlink it from the proper location
+ create = True
+ if os.path.lexists(canonical):
+ old = os.readlink(canonical)
+ if old != path:
+ log.debug('Removing old symlink %s -> %s', canonical, old)
+ try:
+ os.unlink(canonical)
+ except:
+ raise ActivateError('unable to remove old symlink %s', canonical)
+ else:
+ create = False
+ if create:
+ log.debug('Creating symlink %s -> %s', canonical, path)
+ try:
+ os.symlink(path, canonical)
+ except:
+ raise ActivateError('unable to create symlink %s -> %s', canonical, path)
+
+ return (cluster, osd_id)
+
+
+def activate(
+ path,
+ activate_key_template,
+ init,
+ ):
try:
check_osd_magic(path)
@@ -474,11 +598,33 @@ def activate(
keyring=keyring,
)
- # indicate this daemon is managed by upstart
- if not os.path.exists(os.path.join(path, 'upstart')):
- with file(os.path.join(path, 'upstart'), 'w'):
+ if init is not None:
+ if init == 'auto':
+ c = get_conf(
+ cluster=cluster,
+ variable='init'
+ )
+ if c is not None:
+ init = c
+ else:
+ (distro, release, codename) = platform.dist()
+ if distro == 'Ubuntu':
+ init = 'upstart'
+ else:
+ init = 'sysvinit'
+
+ log.debug('Marking with init system %s', init)
+ with file(os.path.join(path, init), 'w'):
pass
+ # remove markers for others, just in case.
+ for other in init_systems:
+ if other != init:
+ try:
+ os.unlink(os.path.join(path, other))
+ except:
+ pass
+
if not os.path.exists(os.path.join(path, 'active')):
log.debug('Authorizing OSD key...')
auth_key(
@@ -488,39 +634,10 @@ def activate(
keyring=keyring,
)
write_one_line(path, 'active', 'ok')
-
- # check if the disk is already active
- active = False
- src_dev = os.stat(path).st_dev
- try:
- dst_dev = os.stat('/var/lib/ceph/osd/{cluster}-{osd_id}'.format(
- cluster=cluster,
- osd_id=osd_id)).st_dev
- if src_dev == dst_dev:
- active = True
- except:
- pass
- if active:
- log.debug('OSD already mounted')
- unmount(path)
- else:
- move_mount(
- path=path,
- cluster=cluster,
- osd_id=osd_id,
- )
+ log.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
+ return (osd_id, cluster)
except:
- unmount(path)
- finally:
- if do_mount:
- # if we created a temp dir to mount it, remove it
- os.rmdir(path)
-
- upstart_start(
- cluster=cluster,
- osd_id=osd_id,
- )
-
+ raise
def parse_args():
parser = argparse.ArgumentParser(
@@ -534,7 +651,7 @@ def parse_args():
parser.add_argument(
'--mount',
action='store_true', default=None,
- help='mount the device first',
+ help='mount a block device; path must follow',
)
parser.add_argument(
'--activate-key',
@@ -545,7 +662,15 @@ def parse_args():
parser.add_argument(
'path',
metavar='PATH',
- help='path to OSD data directory, or block device if using --mount',
+ nargs='?',
+ help='path to block device or directory',
+ )
+ parser.add_argument(
+ '--mark-init',
+ metavar='INITSYSTEM',
+ help='init system to manage this dir',
+ default='auto',
+ choices=init_systems,
)
parser.set_defaults(
activate_key_template='/var/lib/ceph/bootstrap-osd/{cluster}.keyring',
@@ -568,11 +693,33 @@ def main():
)
try:
- activate(
- path=args.path,
- activate_key_template=args.activate_key_template,
- do_mount=args.mount,
+ cluster = None
+ osd_id = None
+
+ if not os.path.exists(args.path):
+ raise ActivateError('%s does not exist', args.path)
+
+ mode = os.stat(args.path).st_mode
+ if stat.S_ISBLK(mode):
+ (cluster, osd_id) = mount_activate(
+ dev=args.path,
+ activate_key_template=args.activate_key_template,
+ init=args.mark_init,
+ )
+ elif stat.S_ISDIR(mode):
+ (cluster, osd_id) = activate_dir(
+ path=args.path,
+ activate_key_template=args.activate_key_template,
+ init=args.mark_init,
+ )
+ else:
+ raise ActivateError('%s is not a directory or block device', args.path)
+
+ start_daemon(
+ cluster=cluster,
+ osd_id=osd_id,
)
+
except ActivateError as e:
print >>sys.stderr, '{prog}: {msg}'.format(
prog=args.prog,
diff --git a/src/ceph-disk-prepare b/src/ceph-disk-prepare
index e5c4bdb9050..b0f003b6e5c 100755
--- a/src/ceph-disk-prepare
+++ b/src/ceph-disk-prepare
@@ -5,10 +5,43 @@ import logging
import os
import os.path
import subprocess
+import stat
import sys
import tempfile
import uuid
+CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
+
+JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
+DMCRYPT_JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-5ec00ceff106'
+OSD_UUID = '4fbd7e29-9d25-41b8-afd0-062c0ceff05d'
+DMCRYPT_OSD_UUID = '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d'
+TOBE_UUID = '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be'
+DMCRYPT_TOBE_UUID = '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be'
+
+DEFAULT_FS_TYPE = 'xfs'
+
+MOUNT_OPTIONS = dict(
+ btrfs='noatime,user_subvol_rm_allowed',
+ ext4='noatime,user_xattr',
+ xfs='noatime',
+ )
+
+MKFS_ARGS = dict(
+ btrfs=[
+ '-m', 'single',
+ '-l', '32768',
+ '-n', '32768',
+ ],
+ xfs=[
+ # xfs insists on not overwriting previous fs; even if we wipe
+ # partition table, we often recreate it exactly the same way,
+ # so we'll see ghosts of filesystems past
+ '-f',
+ '-i', 'size=2048',
+ ],
+ )
+
log_name = __name__
if log_name == '__main__':
@@ -38,6 +71,28 @@ class UnmountError(PrepareError):
"""
+def is_partition(dev):
+ """
+ Check whether a given device is a partition or a full disk.
+ """
+ # resolve symlink(s)
+ max = 10
+ while stat.S_ISLNK(os.lstat(dev).st_mode):
+ dev = os.readlink(dev)
+ max -= 1
+ if max == 0:
+ raise PrepareError('%s is a rats nest of symlinks' % dev)
+ if not stat.S_ISBLK(os.lstat(dev).st_mode):
+ raise PrepareError('not a block device', dev)
+
+ # if the device ends in a number, it is a partition (e.g., /dev/sda3)
+
+ # ugh i have no internet.. how do you do a python regex?
+ if dev.endswith('0') or dev.endswith('1') or dev.endswith('2') or dev.endswith('3') or dev.endswith('4') or dev.endswith('4') or dev.endswith('6') or dev.endswith('7') or dev.endswith('8') or dev.endswith('9'):
+ return True
+ return False
+
+
def write_one_line(parent, name, text):
"""
Write a file whose sole contents are a single line.
@@ -52,11 +107,6 @@ def write_one_line(parent, name, text):
os.rename(tmp, path)
-CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
-
-JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'
-
-
# TODO depend on python2.7
def _check_output(*args, **kwargs):
process = subprocess.Popen(
@@ -140,28 +190,66 @@ def get_fsid(cluster):
return fsid
-DEFAULT_FS_TYPE = 'xfs'
+def get_or_create_dmcrypt_key(
+ uuid,
+ key_dir,
+ ):
+ path = os.path.join(key_dir, uuid)
-MOUNT_OPTIONS = dict(
- btrfs='noatime,user_subvol_rm_allowed',
- ext4='noatime,user_xattr',
- xfs='noatime',
- )
+ # already have it?
+ if os.path.exists(path):
+ return path
+
+ # make a new key
+ try:
+ if not os.path.exists(key_dir):
+ os.makedirs(key_dir)
+ with file('/dev/urandom', 'rb') as i:
+ key = i.read(256)
+ with file(path, 'wb') as f:
+ f.write(key)
+ return path
+ except:
+ raise PrepareError('unable to read or create dm-crypt key', path)
+
+
+def dmcrypt_map(
+ rawdev,
+ keypath,
+ uuid,
+ ):
+ dev = '/dev/mapper/'+ uuid
+ args = [
+ 'cryptsetup',
+ '--key-file',
+ keypath,
+ '--key-size', '256',
+ 'create',
+ uuid,
+ rawdev,
+ ]
+ try:
+ subprocess.check_call(args)
+ return dev
+
+ except subprocess.CalledProcessError as e:
+ raise PrepareError('unable to map device', rawdev)
-MKFS_ARGS = dict(
- btrfs=[
- '-m', 'single',
- '-l', '32768',
- '-n', '32768',
- ],
- xfs=[
- # xfs insists on not overwriting previous fs; even if we wipe
- # partition table, we often recreate it exactly the same way,
- # so we'll see ghosts of filesystems past
- '-f',
- '-i', 'size=2048',
- ],
- )
+
+def dmcrypt_unmap(
+ uuid
+ ):
+ args = [
+ 'cryptsetup',
+ 'remove',
+ uuid
+ ]
+
+ try:
+ subprocess.check_call(args)
+
+ except subprocess.CalledProcessError as e:
+ raise PrepareError('unable to unmap device', uuid)
def mount(
@@ -179,6 +267,7 @@ def mount(
dir='/var/lib/ceph/tmp',
)
try:
+ log.debug('Mounting %s on %s with options %s', dev, path, options)
subprocess.check_call(
args=[
'mount',
@@ -202,6 +291,7 @@ def unmount(
path,
):
try:
+ log.debug('Unmounting %s', path)
subprocess.check_call(
args=[
'umount',
@@ -254,27 +344,21 @@ def get_free_partition_index(dev):
return num
-def prepare(
- disk,
- journal,
- journal_size,
- fstype,
- mkfs_args,
- mount_options,
- cluster_uuid,
- ):
+def zap(dev):
"""
- Prepare a disk to be used as an OSD data disk.
-
- The ``magic`` file is written last, so it's presence is a reliable
- indicator of the whole sequence having completed.
-
- WARNING: This will unconditionally overwrite anything given to
- it.
+ Destroy the partition table and content of a given disk.
"""
-
try:
- # this kills the crab
+ log.debug('Zapping partition table on %s', dev)
+
+ # try to wipe out any GPT partition table backups. sgdisk
+ # isn't too thorough.
+ lba_size = 4096
+ size = 33 * lba_size
+ with file(dev, 'wb') as f:
+ f.seek(-size, os.SEEK_END)
+ f.write(size*'\0')
+
subprocess.check_call(
args=[
'sgdisk',
@@ -282,145 +366,339 @@ def prepare(
'--clear',
'--mbrtogpt',
'--',
- disk,
+ dev,
+ ],
+ )
+ except subprocess.CalledProcessError as e:
+ raise PrepareError(e)
+
+
+def prepare_journal_dev(
+ data,
+ journal,
+ journal_size,
+ journal_uuid,
+ journal_dm_keypath,
+ ):
+
+ if is_partition(journal):
+ log.debug('Journal %s is a partition', journal)
+ log.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
+ return (journal, None, None)
+
+ key = None
+ ptype = JOURNAL_UUID
+ if journal_dm_keypath:
+ ptype = DMCRYPT_JOURNAL_UUID
+
+ # it is a whole disk. create a partition!
+ num = None
+ if journal == data:
+ # we're sharing the disk between osd data and journal;
+ # make journal be partition number 2, so it's pretty; put
+ # journal at end of free space so partitioning tools don't
+ # reorder them suddenly
+ num = 2
+ journal_part = '{num}:-{size}M:0'.format(
+ num=num,
+ size=journal_size,
+ )
+ else:
+ # sgdisk has no way for me to say "whatever is the next
+ # free index number" when setting type guids etc, so we
+ # need to awkwardly look up the next free number, and then
+ # fix that in the call -- and hope nobody races with us;
+ # then again nothing guards the partition table from races
+ # anyway
+ num = get_free_partition_index(dev=journal)
+ journal_part = '{num}:0:+{size}M'.format(
+ num=num,
+ size=journal_size,
+ )
+ log.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
+
+ try:
+ log.debug('Creating journal partition num %d size %d on %s', num, journal_size, journal)
+ subprocess.check_call(
+ args=[
+ 'sgdisk',
+ '--new={part}'.format(part=journal_part),
+ '--change-name={num}:ceph journal'.format(num=num),
+ '--partition-guid={num}:{journal_uuid}'.format(
+ num=num,
+ journal_uuid=journal_uuid,
+ ),
+ '--typecode={num}:{uuid}'.format(
+ num=num,
+ uuid=ptype,
+ ),
+ '--',
+ journal,
+ ],
+ )
+ subprocess.check_call(
+ args=[
+ # also make sure the kernel refreshes the new table
+ 'partprobe',
+ journal,
],
)
+
+ journal_symlink = '/dev/disk/by-partuuid/{journal_uuid}'.format(
+ journal_uuid=journal_uuid,
+ )
+
+ journal_dmcrypt = None
+ if journal_dm_keypath:
+ journal_dmcrypt = journal_symlink
+ journal_symlink = '/dev/mapper/{uuid}'.format(uuid=journal_uuid)
+
+ log.debug('Journal is GPT partition %s', journal_symlink)
+ return (journal_symlink, journal_dmcrypt, journal_uuid)
+
except subprocess.CalledProcessError as e:
raise PrepareError(e)
- osd_uuid = str(uuid.uuid4())
- # store the partition uuid iff using external journal
- journal_uuid = None
+def prepare_journal_file(
+ journal,
+ journal_size):
+
+ if not os.path.exists(journal):
+ log.debug('Creating journal file %s with size %dM', journal, journal_size)
+ with file(journal, 'wb') as f:
+ f.truncate(journal_size * 1048576)
+
+ # FIXME: should we resize an existing journal file?
+
+ log.debug('Journal is file %s', journal)
+ log.warning('OSD will not be hot-swappable if journal is not the same device as the osd data')
+ return (journal, None, None)
+
+
+def prepare_journal(
+ data,
+ journal,
+ journal_size,
+ journal_uuid,
+ force_file,
+ force_dev,
+ journal_dm_keypath,
+ ):
+
+ if journal is None:
+ if force_dev:
+ raise PrepareError('Journal is unspecified; not a block device')
+ return (None, None, None)
+
+ if not os.path.exists(journal):
+ if force_dev:
+ raise PrepareError('Journal does not exist; not a block device', journal)
+ return prepare_journal_file(journal, journal_size)
+
+ jmode = os.stat(journal).st_mode
+ if stat.S_ISREG(jmode):
+ if force_dev:
+ raise PrepareError('Journal is not a block device', journal)
+ return prepare_journal_file(journal, journal_size)
+
+ if stat.S_ISBLK(jmode):
+ if force_file:
+ raise PrepareError('Journal is not a regular file', journal)
+ return prepare_journal_dev(data, journal, journal_size, journal_uuid, journal_dm_keypath)
+
+ raise PrepareError('Journal %s is neither a block device nor regular file', journal)
+
+
+def adjust_symlink(target, path):
+ create = True
+ if os.path.lexists(path):
+ try:
+ mode = os.path.lstat(canonical).st_mode
+ if stat.S_ISREG(mode):
+ log.debug('Removing old file %s', canonical)
+ os.unlink(canonical)
+ elif stat.S_ISLNK(mode):
+ old = os.readlink(canonical)
+ if old != journal:
+ log.debug('Removing old symlink %s -> %s', canonical, old)
+ os.unlink(canonical)
+ else:
+ create = False
+ except:
+ raise PrepareError('unable to remove (or adjust) old file (symlink)', canonical)
+ if create:
+ log.debug('Creating symlink %s -> %s', path, target)
+ try:
+ os.symlink(target, path)
+ except:
+ raise PrepareError('unable to create symlink %s -> %s' % (path, target))
+
+def prepare_dir(
+ path,
+ journal,
+ cluster_uuid,
+ osd_uuid,
+ journal_uuid,
+ journal_dmcrypt = None,
+ ):
+ log.debug('Preparing osd data dir %s', path)
+
+ if osd_uuid is None:
+ osd_uuid = str(uuid.uuid4())
if journal is not None:
- journal_uuid = str(uuid.uuid4())
-
- if journal == disk:
- # we're sharing the disk between osd data and journal;
- # make journal be partition number 2, so it's pretty; put
- # journal at end of free space so partitioning tools don't
- # reorder them suddenly
- num = 2
- journal_part = '{num}:-{size}M:0'.format(
- num=num,
- size=journal_size,
- )
- else:
- # sgdisk has no way for me to say "whatever is the next
- # free index number" when setting type guids etc, so we
- # need to awkwardly look up the next free number, and then
- # fix that in the call -- and hope nobody races with us;
- # then again nothing guards the partition table from races
- # anyway
- num = get_free_partition_index(dev=journal)
- journal_part = '{num}:0:+{size}M'.format(
- num=num,
- size=journal_size,
- )
+ # we're using an external journal; point to it here
+ adjust_symlink(journal, os.path.join(path, 'journal'))
+ if journal_dmcrypt is not None:
+ adjust_symlink(journal_dmcrypt, os.path.join(path, 'journal_dmcrypt'))
+ else:
+ try:
+ os.unlink(os.path.join(path, 'journal_dmcrypt'))
+ except:
+ pass
+
+ write_one_line(path, 'ceph_fsid', cluster_uuid)
+ write_one_line(path, 'fsid', osd_uuid)
+ write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
+
+ if journal_uuid is not None:
+ # i.e., journal is a tagged partition
+ write_one_line(path, 'journal_uuid', journal_uuid)
+
+def prepare_dev(
+ data,
+ journal,
+ fstype,
+ mkfs_args,
+ mount_options,
+ cluster_uuid,
+ osd_uuid,
+ journal_uuid,
+ journal_dmcrypt,
+ osd_dm_keypath,
+ ):
+ """
+ Prepare a data/journal combination to be used for an OSD.
+
+ The ``magic`` file is written last, so it's presence is a reliable
+ indicator of the whole sequence having completed.
+
+ WARNING: This will unconditionally overwrite anything given to
+ it.
+ """
+
+ ptype_tobe = TOBE_UUID
+ ptype_osd = OSD_UUID
+ if osd_dm_keypath:
+ ptype_tobe = DMCRYPT_TOBE_UUID
+ ptype_osd = DMCRYPT_OSD_UUID
+
+ rawdev = None
+ if is_partition(data):
+ log.debug('OSD data device %s is a partition', data)
+ rawdev = data
+ else:
+ log.debug('Creating osd partition on %s', data)
try:
subprocess.check_call(
args=[
'sgdisk',
- '--new={part}'.format(part=journal_part),
- '--change-name={num}:ceph journal'.format(num=num),
- '--partition-guid={num}:{journal_uuid}'.format(
- num=num,
- journal_uuid=journal_uuid,
- ),
- '--typecode={num}:{uuid}'.format(
- num=num,
- uuid=JOURNAL_UUID,
+ '--largest-new=1',
+ '--change-name=1:ceph data',
+ '--partition-guid=1:{osd_uuid}'.format(
+ osd_uuid=osd_uuid,
),
+ '--typecode=1:%s' % ptype_tobe,
'--',
- journal,
+ data,
],
)
subprocess.check_call(
args=[
# also make sure the kernel refreshes the new table
'partprobe',
- journal,
+ data,
],
)
except subprocess.CalledProcessError as e:
raise PrepareError(e)
+ rawdev = '{data}1'.format(data=data)
+
+ dev = None
+ if osd_dm_keypath:
+ dev = dmcrypt_map(rawdev, osd_dm_keypath, osd_uuid)
+ else:
+ dev = rawdev
+
try:
- subprocess.check_call(
- args=[
- 'sgdisk',
- '--largest-new=1',
- '--change-name=1:ceph data',
- '--partition-guid=1:{osd_uuid}'.format(
- osd_uuid=osd_uuid,
- ),
- '--typecode=1:89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
+ args = [
+ 'mkfs',
+ '--type={fstype}'.format(fstype=fstype),
+ ]
+ if mkfs_args is not None:
+ args.extend(mkfs_args.split())
+ if fstype == 'xfs':
+ args.extend(['-f']) # always force
+ else:
+ args.extend(MKFS_ARGS.get(fstype, []))
+ args.extend([
'--',
- disk,
- ],
- )
- subprocess.check_call(
- args=[
- # also make sure the kernel refreshes the new table
- 'partprobe',
- disk,
- ],
- )
- except subprocess.CalledProcessError as e:
- raise PrepareError(e)
+ dev,
+ ])
+ try:
+ log.debug('Creating %s fs on %s', fstype, dev)
+ subprocess.check_call(args=args)
+ except subprocess.CalledProcessError as e:
+ raise PrepareError(e)
- dev = '{disk}1'.format(disk=disk)
- args = [
- 'mkfs',
- '--type={fstype}'.format(fstype=fstype),
- ]
- args.extend(MKFS_ARGS.get(fstype, []))
- if mkfs_args is not None:
- args.extend(mkfs_args.split())
- args.extend
- args.extend([
- '--',
- dev,
- ])
- try:
- subprocess.check_call(args=args)
- except subprocess.CalledProcessError as e:
- raise PrepareError(e)
+ #remove whitespaces from mount_options
+ if mount_options is not None:
+ mount_options = "".join(mount_options.split())
- path = mount(dev=dev, fstype=fstype, options=mount_options)
- try:
- if journal_uuid is not None:
- # we're using an external journal; point to it here
- os.symlink(
- '/dev/disk/by-partuuid/{journal_uuid}'.format(
- journal_uuid=journal_uuid,
- ),
- os.path.join(path, 'journal'),
+ path = mount(dev=dev, fstype=fstype, options=mount_options)
+
+ try:
+ prepare_dir(
+ path=path,
+ journal=journal,
+ cluster_uuid=cluster_uuid,
+ osd_uuid=osd_uuid,
+ journal_uuid=journal_uuid,
+ journal_dmcrypt=journal_dmcrypt,
)
- write_one_line(path, 'ceph_fsid', cluster_uuid)
- write_one_line(path, 'fsid', osd_uuid)
- write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
+ finally:
+ unmount(path)
finally:
- unmount(path)
+ if rawdev != dev:
+ dmcrypt_unmap(osd_uuid)
- try:
- subprocess.check_call(
- args=[
- 'sgdisk',
- '--typecode=1:4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
- '--',
- disk,
- ],
- )
- except subprocess.CalledProcessError as e:
- raise PrepareError(e)
+ if not is_partition(data):
+ try:
+ subprocess.check_call(
+ args=[
+ 'sgdisk',
+ '--typecode=1:%s' % ptype_osd,
+ '--',
+ data,
+ ],
+ )
+ subprocess.check_call(
+ args=[
+ # also make sure the kernel refreshes the new table
+ 'partprobe',
+ data,
+ ],
+ )
+ except subprocess.CalledProcessError as e:
+ raise PrepareError(e)
def parse_args():
parser = argparse.ArgumentParser(
- description='Prepare a disk for a Ceph OSD',
+ description='Prepare a directory for a Ceph OSD',
)
parser.add_argument(
'-v', '--verbose',
@@ -438,13 +716,59 @@ def parse_args():
help='cluster uuid to assign this disk to',
)
parser.add_argument(
+ '--osd-uuid',
+ metavar='UUID',
+ help='unique OSD uuid to assign this disk to',
+ )
+ parser.add_argument(
+ '--journal-uuid',
+ metavar='UUID',
+ help='unique uuid to assign to the journal',
+ )
+ parser.add_argument(
'--fs-type',
help='file system type to use (e.g. "ext4")',
)
parser.add_argument(
- 'disk',
- metavar='DISK',
- help='path to OSD data disk block device',
+ '--zap-disk',
+ action='store_true', default=None,
+ help='destroy the partition table (and content) of a disk',
+ )
+ parser.add_argument(
+ '--data-dir',
+ action='store_true', default=None,
+ help='verify that DATA is a dir',
+ )
+ parser.add_argument(
+ '--data-dev',
+ action='store_true', default=None,
+ help='verify that DATA is a block device',
+ )
+ parser.add_argument(
+ '--journal-file',
+ action='store_true', default=None,
+ help='verify that JOURNAL is a file',
+ )
+ parser.add_argument(
+ '--journal-dev',
+ action='store_true', default=None,
+ help='verify that JOURNAL is a block device',
+ )
+ parser.add_argument(
+ '--dmcrypt',
+ action='store_true', default=None,
+ help='encrypt DATA and/or JOURNAL devices with dm-crypt',
+ )
+ parser.add_argument(
+ '--dmcrypt-key-dir',
+ metavar='KEYDIR',
+ default='/etc/ceph/dmcrypt-keys',
+ help='directory where dm-crypt keys are stored',
+ )
+ parser.add_argument(
+ 'data',
+ metavar='DATA',
+ help='path to OSD data (a disk block device or directory)',
)
parser.add_argument(
'journal',
@@ -473,7 +797,23 @@ def main():
level=loglevel,
)
+ journal_dm_keypath = None
+ osd_dm_keypath = None
+
try:
+ if not os.path.exists(args.data):
+ raise PrepareError('data path does not exist', args.data)
+
+ # FIXME: verify disk/partitions is not in use
+ if args.zap_disk is not None:
+ if not os.path.exists(args.data):
+ raise PrepareError('does not exist', args.data)
+ mode = os.stat(args.data).st_mode
+ if stat.S_ISBLK(mode) and not is_partition(args.data):
+ zap(args.data)
+ else:
+ raise PrepareError('not full block device; cannot zap', args.data)
+
if args.cluster_uuid is None:
args.cluster_uuid = get_fsid(cluster=args.cluster)
if args.cluster_uuid is None:
@@ -484,24 +824,43 @@ def main():
if args.fs_type is None:
args.fs_type = get_conf(
cluster=args.cluster,
- variable='osd_fs_type',
+ variable='osd_mkfs_type',
)
if args.fs_type is None:
+ args.fs_type = get_conf(
+ cluster=args.cluster,
+ variable='osd_fs_type',
+ )
+ if args.fs_type is None:
args.fs_type = DEFAULT_FS_TYPE
mkfs_args = get_conf(
cluster=args.cluster,
- variable='osd_fs_mkfs_arguments_{fstype}'.format(
+ variable='osd_mkfs_options_{fstype}'.format(
fstype=args.fs_type,
),
)
+ if mkfs_args is None:
+ mkfs_args = get_conf(
+ cluster=args.cluster,
+ variable='osd_fs_mkfs_options_{fstype}'.format(
+ fstype=args.fs_type,
+ ),
+ )
mount_options = get_conf(
cluster=args.cluster,
- variable='osd_fs_mount_options_{fstype}'.format(
+ variable='osd_mount_options_{fstype}'.format(
fstype=args.fs_type,
),
)
+ if mount_options is None:
+ mount_options = get_conf(
+ cluster=args.cluster,
+ variable='osd_fs_mount_options_{fstype}'.format(
+ fstype=args.fs_type,
+ ),
+ )
journal_size = get_conf_with_default(
cluster=args.cluster,
@@ -509,16 +868,68 @@ def main():
)
journal_size = int(journal_size)
- prepare(
- disk=args.disk,
+ # colocate journal with data?
+ dmode = os.stat(args.data).st_mode
+ if stat.S_ISBLK(dmode) and not is_partition(args.data) and args.journal is None and args.journal_file is None:
+ log.info('Will colocate journal with data on %s', args.data)
+ args.journal = args.data
+
+ if args.journal_uuid is None:
+ args.journal_uuid = str(uuid.uuid4())
+ if args.osd_uuid is None:
+ args.osd_uuid = str(uuid.uuid4())
+
+ # dm-crypt keys?
+ if args.dmcrypt:
+ journal_dm_keypath = get_or_create_dmcrypt_key(args.journal_uuid, args.dmcrypt_key_dir)
+ osd_dm_keypath = get_or_create_dmcrypt_key(args.osd_uuid, args.dmcrypt_key_dir)
+
+ # prepare journal
+ (journal_symlink, journal_dmcrypt, journal_uuid) = prepare_journal(
+ data=args.data,
journal=args.journal,
journal_size=journal_size,
- fstype=args.fs_type,
- mkfs_args=mkfs_args,
- mount_options=mount_options,
- cluster_uuid=args.cluster_uuid,
+ journal_uuid=args.journal_uuid,
+ force_file=args.journal_file,
+ force_dev=args.journal_dev,
+ journal_dm_keypath=journal_dm_keypath,
)
+
+ # prepare data
+ if stat.S_ISDIR(dmode):
+ if args.data_dev:
+ raise PrepareError('data path is not a block device', args.data)
+ prepare_dir(
+ path=args.data,
+ journal=journal_symlink,
+ cluster_uuid=args.cluster_uuid,
+ osd_uuid=args.osd_uuid,
+ journal_uuid=journal_uuid,
+ journal_dmcrypt=journal_dmcrypt,
+ )
+ elif stat.S_ISBLK(dmode):
+ if args.data_dir:
+ raise PrepareError('data path is not a directory', args.data)
+ prepare_dev(
+ data=args.data,
+ journal=journal_symlink,
+ fstype=args.fs_type,
+ mkfs_args=mkfs_args,
+ mount_options=mount_options,
+ cluster_uuid=args.cluster_uuid,
+ osd_uuid=args.osd_uuid,
+ journal_uuid=journal_uuid,
+ journal_dmcrypt=journal_dmcrypt,
+ osd_dm_keypath=osd_dm_keypath,
+ )
+ else:
+ raise PrepareError('not a dir or block device', args.data)
+
except PrepareError as e:
+ if journal_dm_keypath:
+ os.unlink(journal_dm_keypath)
+ if osd_dm_keypath:
+ os.unlink(osd_dm_keypath)
print >>sys.stderr, '{prog}: {msg}'.format(
prog=args.prog,
msg=e,
diff --git a/src/ceph_authtool.cc b/src/ceph_authtool.cc
index c0a06ca1e53..3075d9c69a7 100644
--- a/src/ceph_authtool.cc
+++ b/src/ceph_authtool.cc
@@ -12,8 +12,6 @@
*
*/
-using namespace std;
-
#include "common/config.h"
#include "common/strtol.h"
@@ -123,7 +121,7 @@ int main(int argc, const char **argv)
!add_key.empty() ||
list ||
!caps_fn.empty() ||
- caps.size() ||
+ !caps.empty() ||
set_auid ||
print_key ||
create_keyring ||
@@ -236,7 +234,7 @@ int main(int argc, const char **argv)
keyring.set_caps(ename, caps);
modified = true;
}
- if (caps.size()) {
+ if (!caps.empty()) {
keyring.set_caps(ename, caps);
modified = true;
}
diff --git a/src/ceph_common.sh b/src/ceph_common.sh
index b66b1de3a53..47a21af85bd 100644
--- a/src/ceph_common.sh
+++ b/src/ceph_common.sh
@@ -45,6 +45,13 @@ check_host() {
#echo host for $name is $host, i am $hostname
+ # sysvinit managed instance in standird location?
+ if [ -e "/var/lib/ceph/$type/ceph-$id/sysvinit" ]; then
+ host="$hostname"
+ echo "=== $type.$id === "
+ return 0
+ fi
+
# ignore all sections without 'host' defined
if [ -z "$host" ]; then
return 1
@@ -82,8 +89,8 @@ do_cmd() {
sudo su $user -c "$1" || { [ -z "$3" ] && echo "failed: '$1'" && exit 1; }
fi
else
- [ $verbose -eq 1 ] && echo "--- $ssh $2 \"cd $sshdir ; ulimit -c unlimited ; $1\""
- $ssh $2 "cd $sshdir ; ulimit -c unlimited ; $1" || { [ -z "$3" ] && echo "failed: '$ssh $1'" && exit 1; }
+ [ $verbose -eq 1 ] && echo "--- $ssh $2 \"if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi; cd $sshdir ; ulimit -c unlimited ; $1\""
+ $ssh $2 "if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi; cd $sshdir ; ulimit -c unlimited ; $1" || { [ -z "$3" ] && echo "failed: '$ssh $1'" && exit 1; }
fi
}
@@ -98,19 +105,54 @@ do_root_cmd() {
sudo bash -c "$1" || { echo "failed: '$1'" ; exit 1; }
fi
else
- [ $verbose -eq 1 ] && echo "--- $rootssh $2 \"cd $sshdir ; ulimit -c unlimited ; $1\""
- $rootssh $2 "cd $sshdir ; ulimit -c unlimited ; $1" || { echo "failed: '$rootssh $1'" ; exit 1; }
+ [ $verbose -eq 1 ] && echo "--- $rootssh $2 \"if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi ; cd $sshdir ; ulimit -c unlimited ; $1\""
+ $rootssh $2 "if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi ; cd $sshdir; ulimit -c unlimited ; $1" || { echo "failed: '$rootssh $1'" ; exit 1; }
+ fi
+}
+
+get_local_daemon_list() {
+ type=$1
+ if [ -d "/var/lib/ceph/$type" ]; then
+ for i in `find /var/lib/ceph/$type -mindepth 1 -maxdepth 1 -type d -printf '%f\n'`; do
+ if [ -e "/var/lib/ceph/$type/$i/sysvinit" ]; then
+ id=`echo $i | sed 's/.*-//'`
+ local="$local $type.$id"
+ fi
+ done
+ fi
+}
+
+get_local_name_list() {
+ orig=$1
+ local=""
+
+ if [ -z "$orig" ]; then
+ # enumerate local directories
+ get_local_daemon_list "mon"
+ get_local_daemon_list "osd"
+ get_local_daemon_list "mds"
+ return
fi
+
+ for f in $orig; do
+ type=`echo $f | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
+ id=`echo $f | cut -c 4- | sed 's/\\.//'`
+ get_local_daemon_list $type
+
+ # FIXME
+ done
}
get_name_list() {
orig=$1
+ # extract list of monitors, mdss, osds defined in startup.conf
+ allconf=`$CCONF -c $conf -l mon | egrep -v '^mon$' ; \
+ $CCONF -c $conf -l mds | egrep -v '^mds$' ; \
+ $CCONF -c $conf -l osd | egrep -v '^osd$'`
+
if [ -z "$orig" ]; then
- # extract list of monitors, mdss, osds defined in startup.conf
- what=`$CCONF -c $conf -l mon | egrep -v '^mon$' ; \
- $CCONF -c $conf -l mds | egrep -v '^mds$' ; \
- $CCONF -c $conf -l osd | egrep -v '^osd$'`
+ what="$allconf $local"
return
fi
@@ -118,17 +160,16 @@ get_name_list() {
for f in $orig; do
type=`echo $f | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
id=`echo $f | cut -c 4- | sed 's/\\.//'`
- all=`$CCONF -c $conf -l $type | egrep -v "^$type$" || true`
case $f in
mon | osd | mds)
- what="$what $all"
+ what=`echo $allconf $local | grep ^$type || true`
;;
*)
- if echo " " $all " " | egrep -v -q "( $type$id | $type.$id )"; then
- echo "$0: $type.$id not found ($conf defines \"$all\")"
+ if ! echo " " $allconf $local " " | egrep -q "( $type$id | $type.$id )"; then
+ echo "$0: $type.$id not found ($conf defines" $allconf", /var/lib/ceph defines" $local")"
exit 1
fi
- what="$what $f"
+ what="$f"
;;
esac
done
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 6cff22be9f0..2afb88bf1fb 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -73,7 +73,7 @@ using namespace std;
#include "Inode.h"
#include "Dentry.h"
#include "Dir.h"
-#include "SnapRealm.h"
+#include "ClientSnapRealm.h"
#include "Fh.h"
#include "MetaSession.h"
#include "MetaRequest.h"
@@ -2566,16 +2566,20 @@ public:
}
};
-bool Client::_flush(Inode *in)
+bool Client::_flush(Inode *in, Context *onfinish)
{
ldout(cct, 10) << "_flush " << *in << dendl;
if (!in->oset.dirty_or_tx) {
ldout(cct, 10) << " nothing to flush" << dendl;
+ if (onfinish)
+ onfinish->complete(0);
return true;
}
- Context *onfinish = new C_Client_PutInode(this, in);
+ if (!onfinish) {
+ onfinish = new C_Client_PutInode(this, in);
+ }
bool safe = objectcacher->flush_set(&in->oset, onfinish);
if (safe) {
onfinish->complete(0);
@@ -3642,7 +3646,7 @@ void Client::unmount()
}
// wait for sessions to close
- while (mds_sessions.size()) {
+ while (!mds_sessions.empty()) {
ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl;
mount_cond.Wait(client_lock);
}
@@ -5877,11 +5881,19 @@ int Client::_fsync(Fh *f, bool syncdataonly)
Inode *in = f->inode;
tid_t wait_on_flush = 0;
bool flushed_metadata = false;
+ Mutex lock("Client::_fsync::lock");
+ Cond cond;
+ bool done = false;
+ C_SafeCond *object_cacher_completion = NULL;
ldout(cct, 3) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl;
- if (cct->_conf->client_oc)
- _flush(in);
+ if (cct->_conf->client_oc) {
+ object_cacher_completion = new C_SafeCond(&lock, &cond, &done, &r);
+ in->get(); // take a reference; C_SafeCond doesn't and _flush won't either
+ _flush(in, object_cacher_completion);
+ ldout(cct, 15) << "using return-valued form of _fsync" << dendl;
+ }
if (!syncdataonly && (in->dirty_caps & ~CEPH_CAP_ANY_FILE_WR)) {
for (map<int, Cap*>::iterator iter = in->caps.begin(); iter != in->caps.end(); ++iter) {
@@ -5893,18 +5905,35 @@ int Client::_fsync(Fh *f, bool syncdataonly)
flushed_metadata = true;
} else ldout(cct, 10) << "no metadata needs to commit" << dendl;
- // FIXME: this can starve
- while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) {
- ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER]
- << " uncommitted, waiting" << dendl;
- wait_on_list(in->waitfor_commit);
+ if (object_cacher_completion) { // wait on a real reply instead of guessing
+ client_lock.Unlock();
+ lock.Lock();
+ ldout(cct, 15) << "waiting on data to flush" << dendl;
+ while (!done)
+ cond.Wait(lock);
+ lock.Unlock();
+ client_lock.Lock();
+ put_inode(in);
+ ldout(cct, 15) << "got " << r << " from flush writeback" << dendl;
+ } else {
+ // FIXME: this can starve
+ while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) {
+ ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER]
+ << " uncommitted, waiting" << dendl;
+ wait_on_list(in->waitfor_commit);
+ }
}
- if (!flushed_metadata) wait_sync_caps(wait_on_flush); //this could wait longer than strictly necessary,
- //but on a sync the user can put up with it
-
- ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl;
+ if (!r) {
+ if (flushed_metadata) wait_sync_caps(wait_on_flush);
+ // this could wait longer than strictly necessary,
+ // but on a sync the user can put up with it
+ ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl;
+ } else {
+ ldout(cct, 1) << "ino " << in->ino << " failed to commit to disk! "
+ << cpp_strerror(-r) << dendl;
+ }
return r;
}
@@ -7457,7 +7486,7 @@ int Client::get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>
pg_t pg = osdmap->object_locator_to_pg(extents[0].oid, extents[0].oloc);
vector<int> osds;
osdmap->pg_to_acting_osds(pg, osds);
- if (!osds.size())
+ if (osds.empty())
return -EINVAL;
for (unsigned i = 0; i < osds.size(); i++) {
diff --git a/src/client/Client.h b/src/client/Client.h
index b3b1f87cf46..3fcdf481ad1 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -451,7 +451,19 @@ protected:
void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len, bool keep_caps);
void _async_invalidate(Inode *in, int64_t off, int64_t len, bool keep_caps);
void _release(Inode *in);
- bool _flush(Inode *in);
+
+ /**
+ * Initiate a flush of the data associated with the given inode.
+ * If you specify a Context, you are responsible for holding an inode
+ * reference for the duration of the flush. If not, _flush() will
+ * take the reference for you.
+ * @param in The Inode whose data you wish to flush.
+ * @param c The Context you wish us to complete once the data is
+ * flushed. If already flushed, this will be called in-line.
+ *
+ * @returns true if the data was already flushed, false otherwise.
+ */
+ bool _flush(Inode *in, Context *c=NULL);
void _flush_range(Inode *in, int64_t off, uint64_t size);
void _flushed(Inode *in);
void flush_set_callback(ObjectCacher::ObjectSet *oset);
diff --git a/src/client/SnapRealm.cc b/src/client/ClientSnapRealm.cc
index 6a5918a0589..3656fbdf505 100644
--- a/src/client/SnapRealm.cc
+++ b/src/client/ClientSnapRealm.cc
@@ -1,7 +1,7 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
-#include "SnapRealm.h"
+#include "ClientSnapRealm.h"
#include "common/Formatter.h"
void SnapRealm::dump(Formatter *f) const
diff --git a/src/client/SnapRealm.h b/src/client/ClientSnapRealm.h
index 34d89568300..34d89568300 100644
--- a/src/client/SnapRealm.h
+++ b/src/client/ClientSnapRealm.h
diff --git a/src/client/Inode.cc b/src/client/Inode.cc
index 4b0c99d5764..60bc489b3c7 100644
--- a/src/client/Inode.cc
+++ b/src/client/Inode.cc
@@ -5,7 +5,7 @@
#include "Inode.h"
#include "Dentry.h"
#include "Dir.h"
-#include "SnapRealm.h"
+#include "ClientSnapRealm.h"
ostream& operator<<(ostream &out, Inode &in)
{
diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc
index b2a936f55ac..45c9d43cec3 100644
--- a/src/client/SyntheticClient.cc
+++ b/src/client/SyntheticClient.cc
@@ -1977,7 +1977,10 @@ int SyntheticClient::write_file(string& fn, int size, loff_t wrsize) // size i
int fd = client->open(fn.c_str(), O_RDWR|O_CREAT);
dout(5) << "writing to " << fn << " fd " << fd << dendl;
- if (fd < 0) return fd;
+ if (fd < 0) {
+ delete[] buf;
+ return fd;
+ }
utime_t from = ceph_clock_now(g_ceph_context);
utime_t start = from;
@@ -2037,7 +2040,10 @@ int SyntheticClient::write_fd(int fd, int size, int wrsize) // size is in MB,
uint64_t chunks = (uint64_t)size * (uint64_t)(1024*1024) / (uint64_t)wrsize;
//dout(5) << "SyntheticClient::write_fd: writing to fd " << fd << dendl;
- if (fd < 0) return fd;
+ if (fd < 0) {
+ delete[] buf;
+ return fd;
+ }
for (unsigned i=0; i<chunks; i++) {
if (time_to_stop()) {
@@ -2087,7 +2093,10 @@ int SyntheticClient::read_file(const std::string& fn, int size,
int fd = client->open(fn.c_str(), O_RDONLY);
dout(5) << "reading from " << fn << " fd " << fd << dendl;
- if (fd < 0) return fd;
+ if (fd < 0) {
+ delete[] buf;
+ return fd;
+ }
utime_t from = ceph_clock_now(g_ceph_context);
utime_t start = from;
@@ -2694,7 +2703,7 @@ int SyntheticClient::random_walk(int num_req)
}
// descend?
- if (.9*roll_die(::pow((double).9,(double)cwd.depth())) && subdirs.size()) {
+ if (.9*roll_die(::pow((double).9,(double)cwd.depth())) && !subdirs.empty()) {
string s = get_random_subdir();
cwd.push_dentry( s );
dout(DBL) << "cd " << s << " -> " << cwd << dendl;
diff --git a/src/client/test_ioctls.c b/src/client/test_ioctls.c
index f510cd26ee6..23fa835c54e 100644
--- a/src/client/test_ioctls.c
+++ b/src/client/test_ioctls.c
@@ -24,7 +24,7 @@ int main(int argc, char **argv)
struct ceph_ioctl_dataloc dl;
if (argc < 3) {
- printf("usage: test_ioctls <filename> <offset>\n");
+ printf("usage: ceph_test_ioctls <filename> <offset>\n");
return 1;
}
fn = argv[1];
diff --git a/src/cls/lock/cls_lock.cc b/src/cls/lock/cls_lock.cc
index 1405d87a1f2..5f27c3cc4b1 100644
--- a/src/cls/lock/cls_lock.cc
+++ b/src/cls/lock/cls_lock.cc
@@ -206,7 +206,7 @@ static int lock_obj(cls_method_context_t hctx,
}
}
- if (lockers.size()) {
+ if (!lockers.empty()) {
if (exclusive) {
CLS_LOG(20, "could not exclusive-lock object, already locked");
return -EBUSY;
diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc
index a55be8c7d83..3088f38178b 100644
--- a/src/cls/rbd/cls_rbd.cc
+++ b/src/cls/rbd/cls_rbd.cc
@@ -1116,7 +1116,7 @@ int get_snapcontext(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
snapid_t snap_id = snap_id_from_key(*it);
snap_ids.push_back(snap_id);
}
- if (keys.size() > 0)
+ if (!keys.empty())
last_read = *(keys.rbegin());
} while (r == max_read);
@@ -1269,7 +1269,7 @@ int snapshot_add(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
}
}
- if (vals.size() > 0)
+ if (!vals.empty())
last_read = vals.rbegin()->first;
} while (r == RBD_MAX_KEYS_READ);
@@ -1717,7 +1717,7 @@ int dir_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
if (images.size() >= max_return)
break;
}
- if (vals.size() > 0) {
+ if (!vals.empty()) {
last_read = dir_key_for_name(images.rbegin()->first);
}
}
diff --git a/src/cls/refcount/cls_refcount.cc b/src/cls/refcount/cls_refcount.cc
index c924c16e62f..5e8edeb887a 100644
--- a/src/cls/refcount/cls_refcount.cc
+++ b/src/cls/refcount/cls_refcount.cc
@@ -134,7 +134,7 @@ static int cls_rc_refcount_put(cls_method_context_t hctx, bufferlist *in, buffer
if (ret < 0)
return ret;
- if (!objr.refs.size()) {// shouldn't happen!
+ if (objr.refs.empty()) {// shouldn't happen!
CLS_LOG(0, "ERROR: cls_rc_refcount_put() was called without any references!\n");
return -EINVAL;
}
@@ -157,7 +157,7 @@ static int cls_rc_refcount_put(cls_method_context_t hctx, bufferlist *in, buffer
objr.refs.erase(iter);
- if (!objr.refs.size()) {
+ if (objr.refs.empty()) {
return cls_cxx_remove(hctx);
}
diff --git a/src/common/AsyncReserver.h b/src/common/AsyncReserver.h
index 8cc2258d7b4..638bfb3a1b1 100644
--- a/src/common/AsyncReserver.h
+++ b/src/common/AsyncReserver.h
@@ -37,7 +37,7 @@ class AsyncReserver {
void do_queues() {
while (in_progress.size() < max_allowed &&
- queue.size()) {
+ !queue.empty()) {
pair<T, Context*> p = queue.front();
queue_pointers.erase(p.first);
queue.pop_front();
diff --git a/src/common/DecayCounter.cc b/src/common/DecayCounter.cc
new file mode 100644
index 00000000000..67a129ccd09
--- /dev/null
+++ b/src/common/DecayCounter.cc
@@ -0,0 +1,82 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "DecayCounter.h"
+#include "Formatter.h"
+
+void DecayCounter::encode(bufferlist& bl) const
+{
+ ENCODE_START(4, 4, bl);
+ ::encode(val, bl);
+ ::encode(delta, bl);
+ ::encode(vel, bl);
+ ENCODE_FINISH(bl);
+}
+
+void DecayCounter::decode(const utime_t &t, bufferlist::iterator &p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, p);
+ if (struct_v < 2) {
+ double half_life;
+ ::decode(half_life, p);
+ }
+ if (struct_v < 3) {
+ double k;
+ ::decode(k, p);
+ }
+ ::decode(val, p);
+ ::decode(delta, p);
+ ::decode(vel, p);
+ DECODE_FINISH(p);
+}
+
+void DecayCounter::dump(Formatter *f) const
+{
+ f->dump_float("value", val);
+ f->dump_float("delta", delta);
+ f->dump_float("velocity", vel);
+}
+
+void DecayCounter::generate_test_instances(list<DecayCounter*>& ls)
+{
+ utime_t fake_time;
+ DecayCounter *counter = new DecayCounter(fake_time);
+ counter->val = 3.0;
+ counter->delta = 2.0;
+ counter->vel = 1.0;
+ ls.push_back(counter);
+ counter = new DecayCounter(fake_time);
+ ls.push_back(counter);
+}
+
+void DecayCounter::decay(utime_t now, const DecayRate &rate)
+{
+ utime_t el = now;
+ el -= last_decay;
+
+ if (el.sec() >= 1) {
+ // calculate new value
+ double newval = (val+delta) * exp((double)el * rate.k);
+ if (newval < .01)
+ newval = 0.0;
+
+ // calculate velocity approx
+ vel += (newval - val) * (double)el;
+ vel *= exp((double)el * rate.k);
+
+ val = newval;
+ delta = 0;
+ last_decay = now;
+ }
+}
diff --git a/src/common/DecayCounter.h b/src/common/DecayCounter.h
index fa6f85f49b0..4e69a886963 100644
--- a/src/common/DecayCounter.h
+++ b/src/common/DecayCounter.h
@@ -51,34 +51,24 @@ public:
public:
- void encode(bufferlist& bl) const {
- __u8 struct_v = 3;
- ::encode(struct_v, bl);
- ::encode(val, bl);
- ::encode(delta, bl);
- ::encode(vel, bl);
- }
- void decode(const utime_t &t, bufferlist::iterator &p) {
- __u8 struct_v;
- ::decode(struct_v, p);
- if (struct_v < 2) {
- double half_life;
- ::decode(half_life, p);
- }
- if (struct_v < 3) {
- double k;
- ::decode(k, p);
- }
- ::decode(val, p);
- ::decode(delta, p);
- ::decode(vel, p);
- }
+ void encode(bufferlist& bl) const;
+ void decode(const utime_t &t, bufferlist::iterator& p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<DecayCounter*>& ls);
DecayCounter(const utime_t &now)
: val(0), delta(0), vel(0), last_decay(now)
{
}
+ // these two functions are for the use of our dencoder testing infrastructure
+ DecayCounter() : val(0), delta(0), vel(0), last_decay() {}
+
+ void decode(bufferlist::iterator& p) {
+ utime_t fake_time;
+ decode(fake_time, p);
+ }
+
/**
* reading
*/
@@ -131,25 +121,8 @@ public:
last_decay = now;
val = delta = 0;
}
-
- void decay(utime_t now, const DecayRate &rate) {
- utime_t el = now;
- el -= last_decay;
-
- if (el.sec() >= 1) {
- // calculate new value
- double newval = (val+delta) * exp((double)el * rate.k);
- if (newval < .01) newval = 0.0;
-
- // calculate velocity approx
- vel += (newval - val) * (double)el;
- vel *= exp((double)el * rate.k);
-
- val = newval;
- delta = 0;
- last_decay = now;
- }
- }
+
+ void decay(utime_t now, const DecayRate &rate);
};
inline void encode(const DecayCounter &c, bufferlist &bl) { c.encode(bl); }
diff --git a/src/common/Mutex.cc b/src/common/Mutex.cc
index 235086470a7..f1e9a550c81 100644
--- a/src/common/Mutex.cc
+++ b/src/common/Mutex.cc
@@ -11,7 +11,6 @@
* Foundation. See file COPYING.
*
*/
-using namespace std;
#include <string>
#include "common/Mutex.h"
diff --git a/src/common/Throttle.cc b/src/common/Throttle.cc
index 844263aa111..9b1e5292ec2 100644
--- a/src/common/Throttle.cc
+++ b/src/common/Throttle.cc
@@ -25,13 +25,17 @@ enum {
l_throttle_last,
};
-Throttle::Throttle(CephContext *cct, std::string n, int64_t m)
- : cct(cct), name(n),
+Throttle::Throttle(CephContext *cct, std::string n, int64_t m, bool _use_perf)
+ : cct(cct), name(n), logger(NULL),
max(m),
- lock("Throttle::lock")
+ lock("Throttle::lock"),
+ use_perf(_use_perf)
{
assert(m >= 0);
+ if (!use_perf)
+ return;
+
PerfCountersBuilder b(cct, string("throttle-") + name, l_throttle_first, l_throttle_last);
b.add_u64_counter(l_throttle_val, "val");
b.add_u64_counter(l_throttle_max, "max");
@@ -58,6 +62,9 @@ Throttle::~Throttle()
cond.pop_front();
}
+ if (!use_perf)
+ return;
+
cct->get_perfcounters_collection()->remove(logger);
delete logger;
}
@@ -65,9 +72,10 @@ Throttle::~Throttle()
void Throttle::_reset_max(int64_t m)
{
assert(lock.is_locked());
- if (m < ((int64_t)max.read()) && !cond.empty())
+ if (!cond.empty())
cond.front()->SignalOne();
- logger->set(l_throttle_max, m);
+ if (logger)
+ logger->set(l_throttle_max, m);
max.set((size_t)m);
}
@@ -90,7 +98,8 @@ bool Throttle::_wait(int64_t c)
if (waited) {
ldout(cct, 3) << "_wait finished waiting" << dendl;
utime_t dur = ceph_clock_now(cct) - start;
- logger->tinc(l_throttle_wait, dur);
+ if (logger)
+ logger->tinc(l_throttle_wait, dur);
}
delete cv;
@@ -122,9 +131,11 @@ int64_t Throttle::take(int64_t c)
Mutex::Locker l(lock);
count.add(c);
}
- logger->inc(l_throttle_take);
- logger->inc(l_throttle_take_sum, c);
- logger->set(l_throttle_val, count.read());
+ if (logger) {
+ logger->inc(l_throttle_take);
+ logger->inc(l_throttle_take_sum, c);
+ logger->set(l_throttle_val, count.read());
+ }
return count.read();
}
@@ -142,9 +153,11 @@ bool Throttle::get(int64_t c, int64_t m)
waited = _wait(c);
count.add(c);
}
- logger->inc(l_throttle_get);
- logger->inc(l_throttle_get_sum, c);
- logger->set(l_throttle_val, count.read());
+ if (logger) {
+ logger->inc(l_throttle_get);
+ logger->inc(l_throttle_get_sum, c);
+ logger->set(l_throttle_val, count.read());
+ }
return waited;
}
@@ -157,15 +170,19 @@ bool Throttle::get_or_fail(int64_t c)
Mutex::Locker l(lock);
if (_should_wait(c) || !cond.empty()) {
ldout(cct, 10) << "get_or_fail " << c << " failed" << dendl;
- logger->inc(l_throttle_get_or_fail_fail);
+ if (logger) {
+ logger->inc(l_throttle_get_or_fail_fail);
+ }
return false;
} else {
ldout(cct, 10) << "get_or_fail " << c << " success (" << count.read() << " -> " << (count.read() + c) << ")" << dendl;
count.add(c);
- logger->inc(l_throttle_get_or_fail_success);
- logger->inc(l_throttle_get);
- logger->inc(l_throttle_get_sum, c);
- logger->set(l_throttle_val, count.read());
+ if (logger) {
+ logger->inc(l_throttle_get_or_fail_success);
+ logger->inc(l_throttle_get);
+ logger->inc(l_throttle_get_sum, c);
+ logger->set(l_throttle_val, count.read());
+ }
return true;
}
}
@@ -180,9 +197,11 @@ int64_t Throttle::put(int64_t c)
cond.front()->SignalOne();
assert(((int64_t)count.read()) >= c); //if count goes negative, we failed somewhere!
count.sub(c);
- logger->inc(l_throttle_put);
- logger->inc(l_throttle_put_sum, c);
- logger->set(l_throttle_val, count.read());
+ if (logger) {
+ logger->inc(l_throttle_put);
+ logger->inc(l_throttle_put_sum, c);
+ logger->set(l_throttle_val, count.read());
+ }
}
return count.read();
}
diff --git a/src/common/Throttle.h b/src/common/Throttle.h
index 15964b247a9..a89783fdb77 100644
--- a/src/common/Throttle.h
+++ b/src/common/Throttle.h
@@ -19,9 +19,10 @@ class Throttle {
ceph::atomic_t count, max;
Mutex lock;
list<Cond*> cond;
+ bool use_perf;
public:
- Throttle(CephContext *cct, std::string n, int64_t m = 0);
+ Throttle(CephContext *cct, std::string n, int64_t m = 0, bool _use_perf = true);
~Throttle();
private:
diff --git a/src/common/WorkQueue.cc b/src/common/WorkQueue.cc
index a7efcc02870..66ce6dc2d15 100644
--- a/src/common/WorkQueue.cc
+++ b/src/common/WorkQueue.cc
@@ -99,7 +99,7 @@ void ThreadPool::worker(WorkThread *wt)
break;
}
- if (!_pause && work_queues.size()) {
+ if (!_pause && !work_queues.empty()) {
WorkQueue_* wq;
int tries = work_queues.size();
bool did = false;
diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h
index 9fb215b9188..ced952c49cd 100644
--- a/src/common/WorkQueue.h
+++ b/src/common/WorkQueue.h
@@ -94,7 +94,7 @@ public:
void *_void_dequeue() {
list<T*> *out(new list<T*>);
_dequeue(out);
- if (out->size()) {
+ if (!out->empty()) {
return (void *)out;
} else {
delete out;
@@ -251,10 +251,10 @@ public:
return (void *)_dequeue();
}
void _void_process(void *p, TPHandle &handle) {
- _process((T *)p, handle);
+ _process(static_cast<T *>(p), handle);
}
void _void_process_finish(void *p) {
- _process_finish((T *)p);
+ _process_finish(static_cast<T *>(p));
}
public:
diff --git a/src/common/buffer.cc b/src/common/buffer.cc
index b2d3ec6ed8c..df50cfccc42 100644
--- a/src/common/buffer.cc
+++ b/src/common/buffer.cc
@@ -285,14 +285,14 @@ bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
}
buffer::ptr& buffer::ptr::operator= (const ptr& p)
{
- // be careful -- we need to properly handle self-assignment.
if (p._raw) {
- p._raw->nref.inc(); // inc new
+ p._raw->nref.inc();
bdout << "ptr " << this << " get " << _raw << bendl;
}
- release(); // dec (+ dealloc) old (if any)
- if (p._raw) {
- _raw = p._raw;
+ buffer::raw *raw = p._raw;
+ release();
+ if (raw) {
+ _raw = raw;
_off = p._off;
_len = p._len;
} else {
@@ -371,7 +371,7 @@ bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
int l = _len < o._len ? _len : o._len;
if (l) {
int r = memcmp(c_str(), o.c_str(), l);
- if (!r)
+ if (r)
return r;
}
if (_len < o._len)
@@ -736,7 +736,7 @@ bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
it != _buffers.end();
it++) {
if (p + it->length() > o) {
- if (p >= o && p+it->length() >= o+l)
+ if (p >= o && p+it->length() <= o+l)
it->zero(); // all
else if (p >= o)
it->zero(0, o+l-p); // head
@@ -744,7 +744,7 @@ bool buffer_track_alloc = get_env_bool("CEPH_BUFFER_TRACK");
it->zero(o-p, it->length()-(o-p)); // tail
}
p += it->length();
- if (o+l >= p)
+ if (o+l <= p)
break; // done
}
}
diff --git a/src/common/ceph_crypto.cc b/src/common/ceph_crypto.cc
index 3f04349c20b..96fa157c9f5 100644
--- a/src/common/ceph_crypto.cc
+++ b/src/common/ceph_crypto.cc
@@ -20,7 +20,6 @@
#include <pthread.h>
#include <stdlib.h>
-void ceph::crypto::shutdown();
#ifdef USE_CRYPTOPP
void ceph::crypto::init(CephContext *cct)
diff --git a/src/common/config.h b/src/common/config.h
index 9bf04fed8a0..cf397bbe53e 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -33,7 +33,7 @@ extern struct ceph_file_layout g_default_file_layout;
#define OSD_REP_SPLAY 1
#define OSD_REP_CHAIN 2
-class config_option;
+struct config_option;
class CephContext;
extern const char *CEPH_CONF_FILE_DEFAULT;
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 5e0449e3606..3963b31aff9 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -100,6 +100,7 @@ OPTION(ms_initial_backoff, OPT_DOUBLE, .2)
OPTION(ms_max_backoff, OPT_DOUBLE, 15.0)
OPTION(ms_nocrc, OPT_BOOL, false)
OPTION(ms_die_on_bad_msg, OPT_BOOL, false)
+OPTION(ms_die_on_unhandled_msg, OPT_BOOL, false)
OPTION(ms_dispatch_throttle_bytes, OPT_U64, 100 << 20)
OPTION(ms_bind_ipv6, OPT_BOOL, false)
OPTION(ms_bind_port_min, OPT_INT, 6800)
@@ -218,6 +219,7 @@ OPTION(mds_dir_max_commit_size, OPT_INT, 90) // MB
OPTION(mds_decay_halflife, OPT_FLOAT, 5)
OPTION(mds_beacon_interval, OPT_FLOAT, 4)
OPTION(mds_beacon_grace, OPT_FLOAT, 15)
+OPTION(mds_enforce_unique_name, OPT_BOOL, true)
OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes
OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle
OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session
@@ -276,6 +278,9 @@ OPTION(mds_kill_export_at, OPT_INT, 0)
OPTION(mds_kill_import_at, OPT_INT, 0)
OPTION(mds_kill_link_at, OPT_INT, 0)
OPTION(mds_kill_rename_at, OPT_INT, 0)
+OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE, 0) /* percentage
+ of MDS modify replies to skip sending the
+ client a trace on [0-1]*/
OPTION(mds_wipe_sessions, OPT_BOOL, 0)
OPTION(mds_wipe_ino_prealloc, OPT_BOOL, 0)
OPTION(mds_skip_ino, OPT_INT, 0)
@@ -314,8 +319,8 @@ OPTION(osd_max_rep, OPT_INT, 10)
OPTION(osd_pool_default_crush_rule, OPT_INT, 0)
OPTION(osd_pool_default_size, OPT_INT, 2)
OPTION(osd_pool_default_min_size, OPT_INT, 0) // 0 means no specific default; ceph will use size-size/2
-OPTION(osd_pool_default_pg_num, OPT_INT, 8)
-OPTION(osd_pool_default_pgp_num, OPT_INT, 8)
+OPTION(osd_pool_default_pg_num, OPT_INT, 8) // number of PGs for new pools. Configure in global or mon section of ceph.conf
+OPTION(osd_pool_default_pgp_num, OPT_INT, 8) // number of PGs for placement purposes. Should be equal to pg_num
OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_cache_size, OPT_INT, 500)
OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message
@@ -456,6 +461,8 @@ OPTION(rgw_enable_apis, OPT_STR, "s3, swift, swift_auth, admin")
OPTION(rgw_cache_enabled, OPT_BOOL, true) // rgw cache enabled
OPTION(rgw_cache_lru_size, OPT_INT, 10000) // num of entries in rgw cache
OPTION(rgw_socket_path, OPT_STR, "") // path to unix domain socket, if not specified, rgw will not run as external fcgi
+OPTION(rgw_host, OPT_STR, "") // host for radosgw, can be an IP, default is 0.0.0.0
+OPTION(rgw_port, OPT_STR, "") // port TCP to listen, format as "8080" "5000", if not specified, rgw will not run as external fcgi
OPTION(rgw_dns_name, OPT_STR, "")
OPTION(rgw_script_uri, OPT_STR, "") // alternative value for SCRIPT_URI if not set in request
OPTION(rgw_request_uri, OPT_STR, "") // alternative value for REQUEST_URI if not set in request
@@ -504,6 +511,8 @@ OPTION(rgw_resolve_cname, OPT_BOOL, false) // should rgw try to resolve hostnam
OPTION(rgw_obj_stripe_size, OPT_INT, 4 << 20)
OPTION(rgw_extended_http_attrs, OPT_STR, "") // list of extended attrs that can be set on objects (beyond the default)
OPTION(rgw_exit_timeout_secs, OPT_INT, 120) // how many seconds to wait for process to go down before exiting unconditionally
+OPTION(rgw_get_obj_window_size, OPT_INT, 16 << 20) // window size in bytes for single get obj request
+OPTION(rgw_get_obj_max_req_size, OPT_INT, 4 << 20) // max length of a single get obj rados op
OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
diff --git a/src/common/fiemap.cc b/src/common/fiemap.cc
index 0df12d6e8fd..a1d5fbe9396 100644
--- a/src/common/fiemap.cc
+++ b/src/common/fiemap.cc
@@ -40,6 +40,7 @@
struct fiemap *read_fiemap(int fd)
{
struct fiemap *fiemap;
+ struct fiemap *_realloc_fiemap = NULL;
int extents_size;
int r;
@@ -62,18 +63,20 @@ struct fiemap *read_fiemap(int fd)
}
if (!fiemap->fm_mapped_extents) {
- free(fiemap);
- return NULL;
+ goto done_err;
}
/* Read in the extents */
extents_size = sizeof(struct fiemap_extent) * (fiemap->fm_mapped_extents);
/* Resize fiemap to allow us to read in the extents */
- if ((fiemap = (struct fiemap*)realloc(fiemap,sizeof(struct fiemap) +
+
+ if ((_realloc_fiemap = (struct fiemap*)realloc(fiemap,sizeof(struct fiemap) +
extents_size)) == NULL) {
fprintf(stderr, "Out of memory allocating fiemap\n");
goto done_err;
+ } else {
+ fiemap = _realloc_fiemap;
}
memset(fiemap->fm_extents, 0, extents_size);
diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc
index 74d54e16c90..54ed0db3f92 100644
--- a/src/common/obj_bencher.cc
+++ b/src/common/obj_bencher.cc
@@ -25,6 +25,7 @@
#include <stdlib.h>
#include <time.h>
#include <sstream>
+#include <vector>
const std::string BENCH_LASTRUN_METADATA = "benchmark_last_metadata";
@@ -305,11 +306,11 @@ int ObjBencher::write_bench(int secondsToRun, int concurrentios) {
std::string prefix = generate_object_prefix();
out(cout) << "Object prefix: " << prefix << std::endl;
- std::string name[concurrentios];
+ std::vector<string> name(concurrentios);
std::string newName;
bufferlist* contents[concurrentios];
double total_latency = 0;
- utime_t start_times[concurrentios];
+ std::vector<utime_t> start_times(concurrentios);
utime_t stopTime;
int r = 0;
bufferlist b_write;
@@ -493,13 +494,13 @@ int ObjBencher::write_bench(int secondsToRun, int concurrentios) {
int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurrentios, int pid) {
lock_cond lc(&lock);
- std::string name[concurrentios];
+ std::vector<string> name(concurrentios);
std::string newName;
bufferlist* contents[concurrentios];
int index[concurrentios];
int errors = 0;
utime_t start_time;
- utime_t start_times[concurrentios];
+ std::vector<utime_t> start_times(concurrentios);
utime_t time_to_run;
time_to_run.set_from_double(seconds_to_run);
double total_latency = 0;
@@ -705,7 +706,7 @@ int ObjBencher::clean_up(const std::string& prefix, int concurrentios) {
int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
lock_cond lc(&lock);
- std::string name[concurrentios];
+ std::vector<string> name(concurrentios);
std::string newName;
int r = 0;
utime_t runtime;
@@ -845,7 +846,7 @@ bool ObjBencher::more_objects_matching_prefix(const std::string& prefix, std::li
objects->clear();
- while (objects->size() == 0) {
+ while (objects->empty()) {
bool objects_remain = get_objects(&unfiltered_objects, 20);
if (!objects_remain)
return false;
@@ -865,7 +866,7 @@ bool ObjBencher::more_objects_matching_prefix(const std::string& prefix, std::li
int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
lock_cond lc(&lock);
- std::string name[concurrentios];
+ std::vector<string> name(concurrentios);
std::string newName;
int r = 0;
utime_t runtime;
@@ -888,7 +889,7 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
//set up initial removes
for (int i = 0; i < concurrentios; ++i) {
- if (objects.size() == 0) {
+ if (objects.empty()) {
// if there are fewer objects than concurrent ios, don't generate extras
bool objects_found = more_objects_matching_prefix(prefix, &objects);
if (!objects_found) {
@@ -940,7 +941,7 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
lock.Unlock();
// get more objects if necessary
- if (objects.size() == 0) {
+ if (objects.empty()) {
objects_remain = more_objects_matching_prefix(prefix, &objects);
// quit if there are no more
if (!objects_remain) {
diff --git a/src/common/types.cc b/src/common/types.cc
deleted file mode 100644
index c5482e10822..00000000000
--- a/src/common/types.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "include/types.h"
-#include "common/Formatter.h"
-
-void dump(const ceph_file_layout& l, Formatter *f)
-{
- f->dump_unsigned("stripe_unit", l.fl_stripe_unit);
- f->dump_unsigned("stripe_count", l.fl_stripe_count);
- f->dump_unsigned("object_size", l.fl_object_size);
- if (l.fl_cas_hash)
- f->dump_unsigned("cas_hash", l.fl_cas_hash);
- if (l.fl_object_stripe_unit)
- f->dump_unsigned("object_stripe_unit", l.fl_object_stripe_unit);
- if (l.fl_pg_pool)
- f->dump_unsigned("pg_pool", l.fl_pg_pool);
-}
-
-void dump(const ceph_dir_layout& dl, Formatter *f)
-{
- f->dump_unsigned("dir_hash", dl.dl_dir_hash);
-}
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 45e4fb53de6..53e3c1cc649 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -349,7 +349,7 @@ int CrushWrapper::create_or_move_item(CephContext *cct, int item, float weight,
}
ldout(cct, 5) << "create_or_move_item adding " << item << " weight " << weight
<< " at " << loc << dendl;
- ret = insert_item(cct, item, weight, name.c_str(), loc);
+ ret = insert_item(cct, item, weight, name, loc);
if (ret == 0)
ret = 1; // changed
}
@@ -385,7 +385,7 @@ int CrushWrapper::update_item(CephContext *cct, int item, float weight, string n
}
ldout(cct, 5) << "update_item adding " << item << " weight " << weight
<< " at " << loc << dendl;
- ret = insert_item(cct, item, weight, name.c_str(), loc);
+ ret = insert_item(cct, item, weight, name, loc);
if (ret == 0)
ret = 1; // changed
}
@@ -488,6 +488,61 @@ void CrushWrapper::reweight(CephContext *cct)
}
}
+int CrushWrapper::add_simple_rule(string name, string root_name, string failure_domain_name)
+{
+ if (rule_exists(name))
+ return -EEXIST;
+ if (!name_exists(root_name.c_str()))
+ return -ENOENT;
+ int root = get_item_id(root_name.c_str());
+ int type = 0;
+ if (failure_domain_name.length()) {
+ type = get_type_id(failure_domain_name.c_str());
+ if (type <= 0) // bah, returns 0 on error; but its ok, device isn't a domain really
+ return -EINVAL;
+ }
+
+ int ruleset = 0;
+ for (int i = 0; i < get_max_rules(); i++) {
+ if (rule_exists(i) &&
+ get_rule_mask_ruleset(i) >= ruleset) {
+ ruleset = get_rule_mask_ruleset(i) + 1;
+ }
+ }
+
+ crush_rule *rule = crush_make_rule(3, ruleset, 1 /* pg_pool_t::TYPE_REP */, 1, 10);
+ assert(rule);
+ crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, root, 0);
+ if (type)
+ crush_rule_set_step(rule, 1,
+ CRUSH_RULE_CHOOSE_LEAF_FIRSTN,
+ CRUSH_CHOOSE_N,
+ type);
+ else
+ crush_rule_set_step(rule, 1,
+ CRUSH_RULE_CHOOSE_FIRSTN,
+ CRUSH_CHOOSE_N,
+ 0);
+ crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0);
+ int rno = crush_add_rule(crush, rule, -1);
+ set_rule_name(rno, name.c_str());
+ have_rmaps = false;
+ return rno;
+}
+
+int CrushWrapper::remove_rule(int ruleno)
+{
+ if (ruleno >= (int)crush->max_rules)
+ return -ENOENT;
+ if (crush->rules[ruleno] == NULL)
+ return -ENOENT;
+ crush_destroy_rule(crush->rules[ruleno]);
+ crush->rules[ruleno] = NULL;
+ rule_name_map.erase(ruleno);
+ have_rmaps = false;
+ return 0;
+}
+
void CrushWrapper::encode(bufferlist& bl, bool lean) const
{
assert(crush);
@@ -817,6 +872,12 @@ void CrushWrapper::dump(Formatter *f) const
f->close_section();
f->open_array_section("rules");
+ dump_rules(f);
+ f->close_section();
+}
+
+void CrushWrapper::dump_rules(Formatter *f) const
+{
for (int i=0; i<get_max_rules(); i++) {
if (!rule_exists(i))
continue;
@@ -872,7 +933,15 @@ void CrushWrapper::dump(Formatter *f) const
f->close_section();
f->close_section();
}
- f->close_section();
+}
+
+void CrushWrapper::list_rules(Formatter *f) const
+{
+ for (int rule = 0; rule < get_max_rules(); rule++) {
+ if (!rule_exists(rule))
+ continue;
+ f->dump_string("name", get_rule_name(rule));
+ }
}
void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 7def6e4ab34..0b919cba3ec 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -221,12 +221,15 @@ public:
}
// rule names
- int get_rule_id(const char *n) {
- string name(n);
+ bool rule_exists(string name) {
+ build_rmaps();
+ return rule_name_rmap.count(name);
+ }
+ int get_rule_id(string name) {
build_rmaps();
if (rule_name_rmap.count(name))
return rule_name_rmap[name];
- return 0; /* hrm */
+ return -ENOENT;
}
const char *get_rule_name(int t) const {
std::map<int,string>::const_iterator p = rule_name_map.find(t);
@@ -527,6 +530,9 @@ public:
return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
}
+ int add_simple_rule(string name, string root_name, string failure_domain_type);
+
+ int remove_rule(int ruleno);
/** buckets **/
@@ -735,6 +741,8 @@ public:
void decode(bufferlist::iterator &blp);
void decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator &blp);
void dump(Formatter *f) const;
+ void dump_rules(Formatter *f) const;
+ void list_rules(Formatter *f) const;
static void generate_test_instances(list<CrushWrapper*>& o);
};
WRITE_CLASS_ENCODER(CrushWrapper)
diff --git a/src/crush/crush.c b/src/crush/crush.c
index 19a765228e9..1e83eb866bb 100644
--- a/src/crush/crush.c
+++ b/src/crush/crush.c
@@ -116,7 +116,7 @@ void crush_destroy(struct crush_map *map)
if (map->rules) {
__u32 b;
for (b = 0; b < map->max_rules; b++)
- kfree(map->rules[b]);
+ crush_destroy_rule(map->rules[b]);
kfree(map->rules);
}
@@ -124,6 +124,11 @@ void crush_destroy(struct crush_map *map)
kfree(map);
}
+void crush_destroy_rule(struct crush_rule *rule)
+{
+ kfree(rule);
+}
+
// methods to check for safe arithmetic operations
int crush_addition_is_unsafe(__u32 a, __u32 b)
{
diff --git a/src/crush/crush.h b/src/crush/crush.h
index 9fd37e9e516..82d032879d9 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -185,6 +185,7 @@ extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
+extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
static inline int crush_calc_tree_node(int i)
diff --git a/src/crushtool.cc b/src/crushtool.cc
index 3cbd915a321..358a07aad43 100644
--- a/src/crushtool.cc
+++ b/src/crushtool.cc
@@ -427,7 +427,7 @@ int main(int argc, const char **argv)
cout << "no action specified; -h for help" << std::endl;
exit(EXIT_FAILURE);
}
- if ((!build) && (args.size() > 0)) {
+ if ((!build) && (!args.empty())) {
cerr << "unrecognized arguments: " << args << std::endl;
exit(EXIT_FAILURE);
}
diff --git a/src/dupstore.cc b/src/dupstore.cc
index 33269028342..c7f5319e5ff 100644
--- a/src/dupstore.cc
+++ b/src/dupstore.cc
@@ -77,7 +77,7 @@ int dupstore(ObjectStore* src, ObjectStore* dst)
void usage()
{
- cerr << "usage: dupstore filestore SRC filestore DST" << std::endl;
+ cerr << "usage: ceph_dupstore filestore SRC filestore DST" << std::endl;
exit(0);
}
diff --git a/src/gtest/.gitignore b/src/gtest/.gitignore
new file mode 100644
index 00000000000..5dc4299f8fe
--- /dev/null
+++ b/src/gtest/.gitignore
@@ -0,0 +1,5 @@
+fused-src
+/scripts/gtest-config
+/build-aux/config.h.in
+/build-aux/config.h
+/lib/
diff --git a/src/include/buffer.h b/src/include/buffer.h
index 9a635bdb5d0..b84e7f4746a 100644
--- a/src/include/buffer.h
+++ b/src/include/buffer.h
@@ -248,7 +248,7 @@ public:
p(other.p),
p_off(other.p_off) {}
- iterator operator=(const iterator& other) {
+ iterator& operator=(const iterator& other) {
if (this != &other) {
bl = other.bl;
ls = other.ls;
@@ -305,8 +305,10 @@ public:
list(const list& other) : _buffers(other._buffers), _len(other._len), last_p(this) { }
list& operator= (const list& other) {
- _buffers = other._buffers;
- _len = other._len;
+ if (this != &other) {
+ _buffers = other._buffers;
+ _len = other._len;
+ }
return *this;
}
@@ -465,6 +467,7 @@ inline bool operator>=(bufferlist& l, bufferlist& r) {
for (unsigned p = 0; ; p++) {
if (l.length() > p && r.length() == p) return true;
if (r.length() == p && l.length() == p) return true;
+ if (l.length() == p && r.length() > p) return false;
if (l[p] > r[p]) return true;
if (l[p] < r[p]) return false;
}
diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h
index cf9c8d8c27f..c9ff72c15f9 100644
--- a/src/include/ceph_features.h
+++ b/src/include/ceph_features.h
@@ -32,7 +32,8 @@
#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
#define CEPH_FEATURE_CREATEPOOLID (1<<26)
#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
-#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
+#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
+#define CEPH_FEATURE_MDSENC (1<<29)
/*
* Features supported. Should be everything above.
@@ -66,7 +67,8 @@
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_CREATEPOOLID | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
- CEPH_FEATURE_OSD_HBMSGS)
+ CEPH_FEATURE_OSD_HBMSGS | \
+ CEPH_FEATURE_MDSENC)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
diff --git a/src/include/frag.h b/src/include/frag.h
index a609833db9a..715eb098283 100644
--- a/src/include/frag.h
+++ b/src/include/frag.h
@@ -510,7 +510,7 @@ inline bool operator!=(const fragtree_t& l, const fragtree_t& r) {
return l._splits != r._splits;
}
-inline std::ostream& operator<<(std::ostream& out, fragtree_t& ft)
+inline std::ostream& operator<<(std::ostream& out, const fragtree_t& ft)
{
out << "fragtree_t(";
diff --git a/src/include/types.h b/src/include/types.h
index c783b6e93ce..dff47ac2b98 100644
--- a/src/include/types.h
+++ b/src/include/types.h
@@ -120,7 +120,7 @@ namespace __gnu_cxx {
// -- io helpers --
template<class A, class B>
-inline ostream& operator<<(ostream& out, const pair<A,B> v) {
+inline ostream& operator<<(ostream& out, const pair<A,B>& v) {
return out << v.first << "," << v.second;
}
diff --git a/src/include/xlist.h b/src/include/xlist.h
index 5c2bf03f856..5384561327a 100644
--- a/src/include/xlist.h
+++ b/src/include/xlist.h
@@ -132,8 +132,8 @@ public:
assert((bool)_front == (bool)_size);
}
- T front() { return (T)_front->_item; }
- T back() { return (T)_back->_item; }
+ T front() { return static_cast<T>(_front->_item); }
+ T back() { return static_cast<T>(_back->_item); }
void pop_front() {
assert(!empty());
@@ -149,7 +149,7 @@ public:
item *cur;
public:
iterator(item *i = 0) : cur(i) {}
- T operator*() { return (T)cur->_item; }
+ T operator*() { return static_cast<T>(cur->_item); }
iterator& operator++() {
assert(cur);
assert(cur->_list);
diff --git a/src/init-ceph.in b/src/init-ceph.in
index f7b85b131e8..5c8c951c66e 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -165,6 +165,7 @@ verify_conf
command=$1
[ -n "$*" ] && shift
+get_local_name_list "$@"
get_name_list "$@"
for name in $what; do
@@ -179,15 +180,16 @@ for name in $what; do
cmd="$binary -i $id"
get_conf pid_file "$RUN_DIR/$type.$id.pid" "pid file"
- if [ -n "$pid_file" ]; then
- do_cmd "mkdir -p "`dirname $pid_file`
- cmd="$cmd --pid-file $pid_file"
- fi
-
- get_conf log_dir "" "log dir"
- [ -n "$log_dir" ] && do_cmd "mkdir -p $log_dir"
if [ "$command" = "start" ]; then
+ if [ -n "$pid_file" ]; then
+ do_cmd "mkdir -p "`dirname $pid_file`
+ cmd="$cmd --pid-file $pid_file"
+ fi
+
+ get_conf log_dir "" "log dir"
+ [ -n "$log_dir" ] && do_cmd "mkdir -p $log_dir"
+
get_conf auto_start "" "auto start"
if [ "$auto_start" = "no" ] || [ "$auto_start" = "false" ] || [ "$auto_start" = "0" ]; then
if [ -z "$@" ]; then
@@ -332,7 +334,7 @@ for name in $what; do
status)
if daemon_is_running $name ceph-$type $id $pid_file; then
- get_conf asok "/var/run/ceph/ceph-$type.$id.asok" "admin socket"
+ get_conf asok "$RUN_DIR/ceph/ceph-$type.$id.asok" "admin socket"
echo -n "$name: running "
do_cmd "$BINDIR/ceph --admin-daemon $asok version 2>/dev/null" || echo unknown
elif [ -e "$pid_file" ]; then
diff --git a/src/key_value_store/cls_kvs.cc b/src/key_value_store/cls_kvs.cc
index fad46f0ca60..8c70a63f54b 100644
--- a/src/key_value_store/cls_kvs.cc
+++ b/src/key_value_store/cls_kvs.cc
@@ -56,7 +56,7 @@ static int get_idata_from_key(cls_method_context_t hctx, const string &key,
CLS_LOG(20, "%s is already in the index: %d", key.c_str(), r);
bufferlist::iterator b = raw_val.begin();
idata.decode(b);
- if (kvmap.size() != 0) {
+ if (!kvmap.empty()) {
bufferlist::iterator b = kvmap.begin()->second.begin();
next_idata.decode(b);
}
@@ -120,7 +120,7 @@ static int get_next_idata(cls_method_context_t hctx, const index_data &idata,
return r;
}
- if (kvs.size() > 0) {
+ if (!kvs.empty()) {
out_data.kdata.parse(kvs.begin()->first);
bufferlist::iterator b = kvs.begin()->second.begin();
out_data.decode(b);
diff --git a/src/key_value_store/kv_flat_btree_async.cc b/src/key_value_store/kv_flat_btree_async.cc
index 96c6cb08e96..ac274379037 100644
--- a/src/key_value_store/kv_flat_btree_async.cc
+++ b/src/key_value_store/kv_flat_btree_async.cc
@@ -189,7 +189,7 @@ int KvFlatBtreeAsync::next(const index_data &idata, index_data * out_data)
<< err << std::endl;
return err;
}
- if (kvs.size() > 0) {
+ if (!kvs.empty()) {
out_data->kdata.parse(kvs.begin()->first);
bufferlist::iterator b = kvs.begin()->second.begin();
out_data->decode(b);
@@ -1959,7 +1959,7 @@ int KvFlatBtreeAsync::remove_all() {
return err;
}
- if (index_set.size() != 0) {
+ if (!index_set.empty()) {
for (std::map<std::string,bufferlist>::iterator it = index_set.begin();
it != index_set.end(); ++it){
librados::ObjectWriteOperation sub;
@@ -2179,7 +2179,7 @@ string KvFlatBtreeAsync::str() {
if (verbose) cout << "getting keys failed with error " << err << std::endl;
return ret.str();
}
- if(index.size() == 0) {
+ if(index.empty()) {
ret << "There are no objects!" << std::endl;
return ret.str();
}
diff --git a/src/libcephfs.cc b/src/libcephfs.cc
index 6f3c04a6d0a..75937586cb0 100644
--- a/src/libcephfs.cc
+++ b/src/libcephfs.cc
@@ -37,6 +37,7 @@ public:
ceph_mount_info(uint64_t msgr_nonce_, CephContext *cct_)
: msgr_nonce(msgr_nonce_),
mounted(false),
+ inited(false),
client(NULL),
monclient(NULL),
messenger(NULL),
@@ -95,6 +96,8 @@ public:
if (ret)
goto fail;
+ inited = true;
+
ret = client->mount(mount_root);
if (ret)
goto fail;
@@ -121,8 +124,9 @@ public:
client->unmount();
mounted = false;
}
- if (client) {
+ if (inited) {
client->shutdown();
+ inited = false;
}
if (messenger) {
messenger->shutdown();
@@ -201,6 +205,7 @@ public:
private:
uint64_t msgr_nonce;
bool mounted;
+ bool inited;
Client *client;
MonClient *monclient;
Messenger *messenger;
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index 0ac6eb4a788..5a81a267f2b 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -12,8 +12,6 @@
*
*/
-using namespace std;
-
#include "common/config.h"
#include "common/errno.h"
#include "common/ceph_argparse.h"
@@ -27,6 +25,20 @@ using namespace std;
#include "librados/PoolAsyncCompletionImpl.h"
#include "librados/RadosClient.h"
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <list>
+#include <stdexcept>
+
+using std::string;
+using std::map;
+using std::set;
+using std::vector;
+using std::list;
+using std::runtime_error;
+
#define dout_subsys ceph_subsys_rados
#undef dout_prefix
#define dout_prefix *_dout << "librados: "
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index fdadf6f6753..33b948d2310 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -360,7 +360,7 @@ namespace librbd {
it != images.end(); ++it) {
names.push_back(it->first);
}
- if (images.size()) {
+ if (!images.empty()) {
last_read = images.rbegin()->first;
}
r = images.size();
@@ -1056,7 +1056,7 @@ reprotect_and_return_err:
return r;
}
omap_values.insert(outbl.begin(), outbl.end());
- if (outbl.size() > 0)
+ if (!outbl.empty())
last_read = outbl.rbegin()->first;
} while (r == MAX_READ);
@@ -1074,7 +1074,7 @@ reprotect_and_return_err:
librados::ObjectWriteOperation op;
op.create(true);
op.write_full(databl);
- if (omap_values.size())
+ if (!omap_values.empty())
op.omap_set(omap_values);
r = io_ctx.operate(dst_oid, &op);
if (r < 0) {
diff --git a/src/log/Entry.h b/src/log/Entry.h
index 7f6b1499f9d..7cdf11612ac 100644
--- a/src/log/Entry.h
+++ b/src/log/Entry.h
@@ -40,7 +40,7 @@ struct Entry {
}
}
- void set_str(const std::string s) {
+ void set_str(const std::string &s) {
ostream os(&m_streambuf);
os << s;
}
diff --git a/src/logrotate.conf b/src/logrotate.conf
index 9af310413d9..e49285a9f50 100644
--- a/src/logrotate.conf
+++ b/src/logrotate.conf
@@ -4,22 +4,20 @@
compress
sharedscripts
postrotate
- if which invoke-rc.d > /dev/null && [ -x `which invoke-rc.d` ]; then
+ if which invoke-rc.d > /dev/null 2>&1 && [ -x `which invoke-rc.d` ]; then
invoke-rc.d ceph reload >/dev/null
- elif which service > /dev/null && [ -x `which service` ]; then
+ elif which service > /dev/null 2>&1 && [ -x `which service` ]; then
service ceph reload >/dev/null
fi
# Possibly reload twice, but depending on ceph.conf the reload above may be a no-op
- if which initctl > /dev/null && [ -x `which initctl` ]; then
+ if which initctl > /dev/null 2>&1 && [ -x `which initctl` ]; then
# upstart reload isn't very helpful here:
# https://bugs.launchpad.net/upstart/+bug/1012938
- for type in mon osd mds; do
- initctl list \
- | perl -ne 'print "$+{service} cluster=$+{cluster} id=$+{id}\n" if m{^(?<service>ceph-(mon|osd|mds)+)\s+\((?<cluster>[^/)]+)/(?<id>[^)]+)\) start/}' \
- | while read l; do
- initctl reload -- $l 2>/dev/null || :
- done
- done
+ initctl list \
+ | sed -n 's/^\(ceph-\(mon\|osd\|mds\)\+\)[ \t]\+(\([^ \/]\+\)\/\([^ \/]\+\))[ \t]\+start\/.*$/\1 cluster=\3 id=\4/p' \
+ | while read l; do
+ initctl reload -- $l 2>/dev/null || :
+ done
fi
endscript
missingok
diff --git a/src/mds/Anchor.cc b/src/mds/Anchor.cc
new file mode 100644
index 00000000000..e24c5f1e024
--- /dev/null
+++ b/src/mds/Anchor.cc
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "mds/Anchor.h"
+
+#include "common/Formatter.h"
+
+void Anchor::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(ino, bl);
+ ::encode(dirino, bl);
+ ::encode(dn_hash, bl);
+ ::encode(nref, bl);
+ ::encode(updated, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Anchor::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(ino, bl);
+ ::decode(dirino, bl);
+ ::decode(dn_hash, bl);
+ ::decode(nref, bl);
+ ::decode(updated, bl);
+ DECODE_FINISH(bl);
+}
+
+void Anchor::dump(Formatter *f) const
+{
+ f->dump_unsigned("ino", ino);
+ f->dump_unsigned("dirino", dirino);
+ f->dump_unsigned("dn_hash", dn_hash);
+ f->dump_unsigned("num_ref", nref);
+ f->dump_unsigned("updated", updated);
+}
+
+void Anchor::generate_test_instances(list<Anchor*>& ls)
+{
+ ls.push_back(new Anchor);
+ ls.push_back(new Anchor);
+ ls.back()->ino = 1;
+ ls.back()->dirino = 2;
+ ls.back()->dn_hash = 3;
+ ls.back()->nref = 4;
+ ls.back()->updated = 5;
+}
+
+ostream& operator<<(ostream& out, const Anchor &a)
+{
+ return out << "a(" << a.ino << " " << a.dirino << "/" << a.dn_hash << " " << a.nref << " v" << a.updated << ")";
+}
diff --git a/src/mds/Anchor.h b/src/mds/Anchor.h
index 52e71f9ea68..e8a6a645214 100644
--- a/src/mds/Anchor.h
+++ b/src/mds/Anchor.h
@@ -25,7 +25,9 @@ using std::string;
// identifies a anchor table mutation
-
+namespace ceph {
+ class Formatter;
+}
// anchor type
@@ -41,30 +43,13 @@ public:
Anchor(inodeno_t i, inodeno_t di, __u32 hash, int nr, version_t u) :
ino(i), dirino(di), dn_hash(hash), nref(nr), updated(u) { }
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(ino, bl);
- ::encode(dirino, bl);
- ::encode(dn_hash, bl);
- ::encode(nref, bl);
- ::encode(updated, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(ino, bl);
- ::decode(dirino, bl);
- ::decode(dn_hash, bl);
- ::decode(nref, bl);
- ::decode(updated, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<Anchor*>& ls);
};
WRITE_CLASS_ENCODER(Anchor)
-inline ostream& operator<<(ostream& out, const Anchor &a)
-{
- return out << "a(" << a.ino << " " << a.dirino << "/" << a.dn_hash << " " << a.nref << " v" << a.updated << ")";
-}
+ostream& operator<<(ostream& out, const Anchor &a);
#endif
diff --git a/src/mds/AnchorServer.cc b/src/mds/AnchorServer.cc
index 4b1bf64f9fa..e980bd4fac9 100644
--- a/src/mds/AnchorServer.cc
+++ b/src/mds/AnchorServer.cc
@@ -41,6 +41,30 @@ void AnchorServer::dump()
dout(15) << "dump " << it->second << dendl;
}
+void AnchorServer::dump(Formatter *f) const
+{
+ f->open_array_section("anchor map");
+ for (map<inodeno_t, Anchor>::const_iterator i = anchor_map.begin();
+ i != anchor_map.end(); ++i) {
+ f->open_object_section("entry");
+ f->dump_int("ino", i->first);
+ f->open_object_section("Anchor");
+ i->second.dump(f);
+ f->close_section(); // Anchor
+ f->close_section(); // entry
+ }
+ f->close_section(); // anchor map
+}
+
+void AnchorServer::generate_test_instances(list<AnchorServer*>& ls)
+{
+ AnchorServer *sample = new AnchorServer();
+ sample->pending_create[0] = 0;
+ sample->pending_destroy[0] = 1;
+ sample->anchor_map[0] = Anchor();
+ ls.push_back(sample);
+}
+
/*
diff --git a/src/mds/AnchorServer.h b/src/mds/AnchorServer.h
index 50a848e3335..b82c72e2e70 100644
--- a/src/mds/AnchorServer.h
+++ b/src/mds/AnchorServer.h
@@ -34,20 +34,20 @@ class AnchorServer : public MDSTableServer {
void reset_state();
void encode_server_state(bufferlist& bl) {
- __u8 v = 1;
- ::encode(v, bl);
+ ENCODE_START(2, 2, bl);
::encode(anchor_map, bl);
::encode(pending_create, bl);
::encode(pending_destroy, bl);
::encode(pending_update, bl);
+ ENCODE_FINISH(bl);
}
void decode_server_state(bufferlist::iterator& p) {
- __u8 v;
- ::decode(v, p);
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
::decode(anchor_map, p);
::decode(pending_create, p);
::decode(pending_destroy, p);
::decode(pending_update, p);
+ DECODE_FINISH(p);
map<version_t, inodeno_t> sort;
sort.insert(pending_create.begin(), pending_create.end());
@@ -65,6 +65,15 @@ class AnchorServer : public MDSTableServer {
bool check_pending(version_t tid, MMDSTableRequest *req, list<Context *>& finished);
void dump();
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<AnchorServer*>& ls);
+ // for the dencoder
+ AnchorServer() : MDSTableServer(NULL, TABLE_ANCHOR) {}
+ void encode(bufferlist& bl) const {
+ AnchorServer *me = const_cast<AnchorServer*>(this);
+ me->encode_server_state(bl);
+ }
+ void decode(bufferlist::iterator& bl) { decode_server_state(bl); }
// server bits
void _prepare(bufferlist &bl, uint64_t reqid, int bymds);
diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h
index aa10bf97118..d07ef066acd 100644
--- a/src/mds/CDentry.h
+++ b/src/mds/CDentry.h
@@ -231,7 +231,7 @@ public:
bool is_projected() { return projected.size(); }
linkage_t *get_projected_linkage() {
- if (projected.size())
+ if (!projected.empty())
return &projected.back();
return &linkage;
}
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index 22cdf48b5af..a1ed05cea26 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -1175,7 +1175,7 @@ void CDir::add_waiter(uint64_t tag, Context *c)
/* NOTE: this checks dentry waiters too */
void CDir::take_waiting(uint64_t mask, list<Context*>& ls)
{
- if ((mask & WAIT_DENTRY) && waiting_on_dentry.size()) {
+ if ((mask & WAIT_DENTRY) && !waiting_on_dentry.empty()) {
// take all dentry waiters
while (!waiting_on_dentry.empty()) {
map<string_snap_t, list<Context*> >::iterator p = waiting_on_dentry.begin();
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 33b4bfd2340..b2b1faf3475 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -315,7 +315,7 @@ inode_t *CInode::project_inode(map<string,bufferptr> *px)
*px = xattrs;
projected_nodes.back()->dir_layout = default_layout;
} else {
- default_file_layout *last_dl = projected_nodes.back()->dir_layout;
+ file_layout_policy_t *last_dl = projected_nodes.back()->dir_layout;
projected_nodes.push_back(new projected_inode_t(
new inode_t(*projected_nodes.back()->inode)));
if (px)
@@ -760,7 +760,7 @@ void CInode::make_path_string_projected(string& s)
{
make_path_string(s);
- if (projected_parent.size()) {
+ if (!projected_parent.empty()) {
string q;
q.swap(s);
s = "{" + q;
@@ -805,7 +805,7 @@ void CInode::name_stray_dentry(string& dname)
version_t CInode::pre_dirty()
{
version_t pv;
- if (parent || projected_parent.size()) {
+ if (parent || !projected_parent.empty()) {
pv = get_projected_parent_dn()->pre_dirty(get_projected_version());
dout(10) << "pre_dirty " << pv << " (current v " << inode.version << ")" << dendl;
} else {
@@ -1059,6 +1059,48 @@ void CInode::_stored_parent(version_t v, Context *fin)
}
}
+void CInode::encode_store(bufferlist& bl)
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(inode, bl);
+ if (is_symlink())
+ ::encode(symlink, bl);
+ ::encode(dirfragtree, bl);
+ ::encode(xattrs, bl);
+ bufferlist snapbl;
+ encode_snap_blob(snapbl);
+ ::encode(snapbl, bl);
+ ::encode(old_inodes, bl);
+ if (inode.is_dir()) {
+ ::encode((default_layout ? true : false), bl);
+ if (default_layout)
+ ::encode(*default_layout, bl);
+ }
+ ENCODE_FINISH(bl);
+}
+
+void CInode::decode_store(bufferlist::iterator& bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ ::decode(inode, bl);
+ if (is_symlink())
+ ::decode(symlink, bl);
+ ::decode(dirfragtree, bl);
+ ::decode(xattrs, bl);
+ bufferlist snapbl;
+ ::decode(snapbl, bl);
+ decode_snap_blob(snapbl);
+ ::decode(old_inodes, bl);
+ if (struct_v >= 2 && inode.is_dir()) {
+ bool default_layout_exists;
+ ::decode(default_layout_exists, bl);
+ if (default_layout_exists) {
+ delete default_layout;
+ default_layout = new file_layout_policy_t;
+ ::decode(*default_layout, bl);
+ }
+ }
+ DECODE_FINISH(bl);
+}
// ------------------
// locking
@@ -1401,7 +1443,7 @@ void CInode::decode_lock_state(int type, bufferlist& bl)
dir->fnode.rstat = rstat;
dir->fnode.accounted_rstat = accounted_rstat;
dir->dirty_old_rstat.swap(dirty_old_rstat);
- if (!(rstat == accounted_rstat) || dir->dirty_old_rstat.size()) {
+ if (!(rstat == accounted_rstat) || !dir->dirty_old_rstat.empty()) {
dout(10) << fg << " setting nestlock updated flag" << dendl;
nestlock.mark_dirty(); // ok bc we're auth and caller will handle
}
@@ -1445,7 +1487,7 @@ void CInode::decode_lock_state(int type, bufferlist& bl)
::decode(default_layout_exists, p);
if (default_layout_exists) {
delete default_layout;
- default_layout = new default_file_layout;
+ default_layout = new file_layout_policy_t;
decode(*default_layout, p);
}
}
@@ -2599,7 +2641,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session,
SnapRealm *dir_realm,
snapid_t snapid, unsigned max_bytes)
{
- int client = session->inst.name.num();
+ int client = session->info.inst.name.num();
assert(snapid);
assert(session->connection);
@@ -2991,8 +3033,7 @@ void CInode::_decode_locks_rejoin(bufferlist::iterator& p, list<Context*>& waite
void CInode::encode_export(bufferlist& bl)
{
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
+ ENCODE_START(3, 3, bl)
_encode_base(bl);
bool dirty = is_dirty();
@@ -3022,6 +3063,7 @@ void CInode::encode_export(bufferlist& bl)
_encode_locks_full(bl);
get(PIN_TEMPEXPORTING);
+ ENCODE_FINISH(bl);
}
void CInode::finish_export(utime_t now)
@@ -3039,8 +3081,7 @@ void CInode::finish_export(utime_t now)
void CInode::decode_import(bufferlist::iterator& p,
LogSegment *ls)
{
- __u8 struct_v;
- ::decode(struct_v, p);
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, p);
_decode_base(p);
@@ -3102,4 +3143,5 @@ void CInode::decode_import(bufferlist::iterator& p,
}
_decode_locks_full(p);
+ DECODE_FINISH(p);
}
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index 8b18ce72f1e..32d27bcbe13 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -31,7 +31,7 @@
#include "ScatterLock.h"
#include "LocalLock.h"
#include "Capability.h"
-#include "snap.h"
+#include "SnapRealm.h"
#include <list>
#include <vector>
@@ -63,37 +63,6 @@ struct cinode_lock_info_t {
extern cinode_lock_info_t cinode_lock_info[];
extern int num_cinode_locks;
-/**
- * Default file layout stuff. This lets us set a default file layout on
- * a directory inode that all files in its tree will use on creation.
- */
-struct default_file_layout {
-
- ceph_file_layout layout;
-
- default_file_layout() {
- memset(&layout, 0, sizeof(layout));
- }
-
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(layout, bl);
- }
-
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v != 1) { //uh-oh
- derr << "got default layout I don't understand!" << dendl;
- assert(0);
- }
- ::decode(layout, bl);
- }
-};
-WRITE_CLASS_ENCODER(default_file_layout);
-
-
// cached inode wrapper
class CInode : public MDSCacheObject {
/*
@@ -221,7 +190,7 @@ public:
return snaprealm || // other snaprealms will link to me
inode.is_dir() || // links to me in other snaps
inode.nlink > 1 || // there are remote links, possibly snapped, that will need to find me
- old_inodes.size(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
+ !old_inodes.empty(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
}
snapid_t get_oldest_snap();
@@ -242,7 +211,7 @@ public:
//bool hack_accessed;
//utime_t hack_load_stamp;
- default_file_layout *default_layout;
+ file_layout_policy_t *default_layout;
/**
* Projection methods, used to store inode changes until they have been journaled,
@@ -261,13 +230,13 @@ public:
inode_t *inode;
map<string,bufferptr> *xattrs;
sr_t *snapnode;
- default_file_layout *dir_layout;
+ file_layout_policy_t *dir_layout;
projected_inode_t() : inode(NULL), xattrs(NULL), snapnode(NULL), dir_layout(NULL) {}
projected_inode_t(inode_t *in, sr_t *sn) : inode(in), xattrs(NULL), snapnode(sn),
dir_layout(NULL) {}
projected_inode_t(inode_t *in, map<string, bufferptr> *xp = NULL, sr_t *sn = NULL,
- default_file_layout *dl = NULL) :
+ file_layout_policy_t *dl = NULL) :
inode(in), xattrs(xp), snapnode(sn), dir_layout(dl) {}
};
list<projected_inode_t*> projected_nodes; // projected values (only defined while dirty)
@@ -585,46 +554,8 @@ private:
void build_backtrace(inode_backtrace_t& bt);
unsigned encode_parent_mutation(ObjectOperation& m);
- void encode_store(bufferlist& bl) {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(inode, bl);
- if (is_symlink())
- ::encode(symlink, bl);
- ::encode(dirfragtree, bl);
- ::encode(xattrs, bl);
- bufferlist snapbl;
- encode_snap_blob(snapbl);
- ::encode(snapbl, bl);
- ::encode(old_inodes, bl);
- if (inode.is_dir()) {
- ::encode((default_layout ? true : false), bl);
- if (default_layout)
- ::encode(*default_layout, bl);
- }
- }
- void decode_store(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(inode, bl);
- if (is_symlink())
- ::decode(symlink, bl);
- ::decode(dirfragtree, bl);
- ::decode(xattrs, bl);
- bufferlist snapbl;
- ::decode(snapbl, bl);
- decode_snap_blob(snapbl);
- ::decode(old_inodes, bl);
- if (struct_v >= 2 && inode.is_dir()) {
- bool default_layout_exists;
- ::decode(default_layout_exists, bl);
- if (default_layout_exists) {
- delete default_layout;
- default_layout = new default_file_layout;
- ::decode(*default_layout, bl);
- }
- }
- }
+ void encode_store(bufferlist& bl);
+ void decode_store(bufferlist::iterator& bl);
void encode_replica(int rep, bufferlist& bl) {
assert(is_auth());
@@ -656,7 +587,7 @@ private:
::decode(default_layout_exists, p);
if (default_layout_exists) {
delete default_layout;
- default_layout = new default_file_layout;
+ default_layout = new file_layout_policy_t;
::decode(*default_layout, p);
}
}
diff --git a/src/mds/Capability.cc b/src/mds/Capability.cc
new file mode 100644
index 00000000000..f1394308d9b
--- /dev/null
+++ b/src/mds/Capability.cc
@@ -0,0 +1,172 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "Capability.h"
+
+#include "common/Formatter.h"
+
+
+/*
+ * Capability::Export
+ */
+
+void Capability::Export::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(wanted, bl);
+ ::encode(issued, bl);
+ ::encode(pending, bl);
+ ::encode(client_follows, bl);
+ ::encode(mseq, bl);
+ ::encode(last_issue_stamp, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Capability::Export::decode(bufferlist::iterator &p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
+ ::decode(wanted, p);
+ ::decode(issued, p);
+ ::decode(pending, p);
+ ::decode(client_follows, p);
+ ::decode(mseq, p);
+ ::decode(last_issue_stamp, p);
+ DECODE_FINISH(p);
+}
+
+void Capability::Export::dump(Formatter *f) const
+{
+ f->dump_unsigned("wanted", wanted);
+ f->dump_unsigned("issued", issued);
+ f->dump_unsigned("pending", pending);
+ f->dump_unsigned("client_follows", client_follows);
+ f->dump_unsigned("migrate_seq", mseq);
+ f->dump_stream("last_issue_stamp") << last_issue_stamp;
+}
+
+void Capability::Export::generate_test_instances(list<Capability::Export*>& ls)
+{
+ ls.push_back(new Export);
+ ls.push_back(new Export);
+ ls.back()->wanted = 1;
+ ls.back()->issued = 2;
+ ls.back()->pending = 3;
+ ls.back()->client_follows = 4;
+ ls.back()->mseq = 5;
+ ls.back()->last_issue_stamp = utime_t(6, 7);
+}
+
+
+/*
+ * Capability::revoke_info
+ */
+
+void Capability::revoke_info::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl)
+ ::encode(before, bl);
+ ::encode(seq, bl);
+ ::encode(last_issue, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Capability::revoke_info::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(before, bl);
+ ::decode(seq, bl);
+ ::decode(last_issue, bl);
+ DECODE_FINISH(bl);
+}
+
+void Capability::revoke_info::dump(Formatter *f) const
+{
+ f->dump_unsigned("before", before);
+ f->dump_unsigned("seq", seq);
+ f->dump_unsigned("last_issue", last_issue);
+}
+
+void Capability::revoke_info::generate_test_instances(list<Capability::revoke_info*>& ls)
+{
+ ls.push_back(new revoke_info);
+ ls.push_back(new revoke_info);
+ ls.back()->before = 1;
+ ls.back()->seq = 2;
+ ls.back()->last_issue = 3;
+}
+
+
+/*
+ * Capability
+ */
+
+void Capability::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl)
+ ::encode(last_sent, bl);
+ ::encode(last_issue_stamp, bl);
+
+ ::encode(_wanted, bl);
+ ::encode(_pending, bl);
+ ::encode(_revokes, bl);
+ ENCODE_FINISH(bl);
+}
+
+void Capability::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl)
+ ::decode(last_sent, bl);
+ ::decode(last_issue_stamp, bl);
+
+ ::decode(_wanted, bl);
+ ::decode(_pending, bl);
+ ::decode(_revokes, bl);
+ DECODE_FINISH(bl);
+
+ _calc_issued();
+}
+
+void Capability::dump(Formatter *f) const
+{
+ f->dump_unsigned("last_sent", last_sent);
+ f->dump_unsigned("last_issue_stamp", last_issue_stamp);
+ f->dump_unsigned("wanted", _wanted);
+ f->dump_unsigned("pending", _pending);
+
+ f->open_array_section("revokes");
+ for (list<revoke_info>::const_iterator p = _revokes.begin(); p != _revokes.end(); ++p) {
+ f->open_object_section("revoke");
+ p->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void Capability::generate_test_instances(list<Capability*>& ls)
+{
+ ls.push_back(new Capability);
+ ls.push_back(new Capability);
+ ls.back()->last_sent = 11;
+ ls.back()->last_issue_stamp = utime_t(12, 13);
+ ls.back()->_wanted = 14;
+ ls.back()->_pending = 15;
+ ls.back()->_revokes.push_back(revoke_info());
+ ls.back()->_revokes.back().before = 16;
+ ls.back()->_revokes.back().seq = 17;
+ ls.back()->_revokes.back().last_issue = 18;
+ ls.back()->_revokes.push_back(revoke_info());
+ ls.back()->_revokes.back().before = 19;
+ ls.back()->_revokes.back().seq = 20;
+ ls.back()->_revokes.back().last_issue = 21;
+}
diff --git a/src/mds/Capability.h b/src/mds/Capability.h
index 6fe67f45b1d..946afdc02b9 100644
--- a/src/mds/Capability.h
+++ b/src/mds/Capability.h
@@ -21,6 +21,8 @@
#include "common/config.h"
+#include "mdstypes.h"
+
/*
Capability protocol notes.
@@ -57,6 +59,10 @@
class CInode;
+namespace ceph {
+ class Formatter;
+}
+
class Capability {
private:
static boost::pool<> pool;
@@ -81,26 +87,10 @@ public:
Export() {}
Export(int w, int i, int p, snapid_t cf, ceph_seq_t s, utime_t lis) :
wanted(w), issued(i), pending(p), client_follows(cf), mseq(s), last_issue_stamp(lis) {}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(wanted, bl);
- ::encode(issued, bl);
- ::encode(pending, bl);
- ::encode(client_follows, bl);
- ::encode(mseq, bl);
- ::encode(last_issue_stamp, bl);
- }
- void decode(bufferlist::iterator &p) {
- __u8 struct_v;
- ::decode(struct_v, p);
- ::decode(wanted, p);
- ::decode(issued, p);
- ::decode(pending, p);
- ::decode(client_follows, p);
- ::decode(mseq, p);
- ::decode(last_issue_stamp, p);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<Export*>& ls);
};
private:
@@ -123,20 +113,10 @@ public:
ceph_seq_t seq, last_issue;
revoke_info() {}
revoke_info(__u32 b, ceph_seq_t s, ceph_seq_t li) : before(b), seq(s), last_issue(li) {}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(before, bl);
- ::encode(seq, bl);
- ::encode(last_issue, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(before, bl);
- ::decode(seq, bl);
- ::decode(last_issue, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<revoke_info*>& ls);
};
private:
__u32 _pending, _issued;
@@ -231,7 +211,7 @@ public:
xlist<Capability*>::item item_session_caps;
xlist<Capability*>::item item_snaprealm_caps;
- Capability(CInode *i, uint64_t id, client_t c) :
+ Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) :
inode(i), client(c),
cap_id(id),
_wanted(0),
@@ -326,28 +306,10 @@ public:
}
// serializers
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(last_sent, bl);
- ::encode(last_issue_stamp, bl);
-
- ::encode(_wanted, bl);
- ::encode(_pending, bl);
- ::encode(_revokes, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(last_sent, bl);
- ::decode(last_issue_stamp, bl);
-
- ::decode(_wanted, bl);
- ::decode(_pending, bl);
- ::decode(_revokes, bl);
-
- _calc_issued();
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<Capability*>& ls);
};
diff --git a/src/mds/InoTable.h b/src/mds/InoTable.h
index 3a706360797..88fd9ecaaaa 100644
--- a/src/mds/InoTable.h
+++ b/src/mds/InoTable.h
@@ -44,15 +44,15 @@ class InoTable : public MDSTable {
void reset_state();
void encode_state(bufferlist& bl) {
- __u8 v = 1;
- ::encode(v, bl);
+ ENCODE_START(2, 2, bl);
::encode(free, bl);
+ ENCODE_FINISH(bl);
}
void decode_state(bufferlist::iterator& bl) {
- __u8 v;
- ::decode(v, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
::decode(free, bl);
projected_free = free;
+ DECODE_FINISH(bl);
}
void skip_inos(inodeno_t i);
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index bce314284db..da6661889ef 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -26,7 +26,6 @@
#include "include/filepath.h"
-#include "events/EString.h"
#include "events/EUpdate.h"
#include "events/EOpen.h"
@@ -1576,7 +1575,7 @@ void Locker::file_update_finish(CInode *in, Mutation *mut, bool share, client_t
mut->cleanup();
delete mut;
- if (!in->is_head() && in->client_snap_caps.size()) {
+ if (!in->is_head() && !in->client_snap_caps.empty()) {
dout(10) << " client_snap_caps " << in->client_snap_caps << dendl;
// check for snap writeback completion
bool gather = false;
@@ -1628,8 +1627,8 @@ Capability* Locker::issue_new_caps(CInode *in,
}
// my needs
- assert(session->inst.name.is_client());
- int my_client = session->inst.name.num();
+ assert(session->info.inst.name.is_client());
+ int my_client = session->info.inst.name.num();
int my_want = ceph_caps_for_mode(mode);
// register a capability
@@ -1811,7 +1810,7 @@ void Locker::issue_truncate(CInode *in)
void Locker::revoke_stale_caps(Session *session)
{
- dout(10) << "revoke_stale_caps for " << session->inst.name << dendl;
+ dout(10) << "revoke_stale_caps for " << session->info.inst.name << dendl;
client_t client = session->get_client();
for (xlist<Capability*>::iterator p = session->caps.begin(); !p.end(); ++p) {
@@ -1845,7 +1844,7 @@ void Locker::revoke_stale_caps(Session *session)
void Locker::resume_stale_caps(Session *session)
{
- dout(10) << "resume_stale_caps for " << session->inst.name << dendl;
+ dout(10) << "resume_stale_caps for " << session->info.inst.name << dendl;
for (xlist<Capability*>::iterator p = session->caps.begin(); !p.end(); ++p) {
Capability *cap = *p;
@@ -1862,7 +1861,7 @@ void Locker::resume_stale_caps(Session *session)
void Locker::remove_stale_leases(Session *session)
{
- dout(10) << "remove_stale_leases for " << session->inst.name << dendl;
+ dout(10) << "remove_stale_leases for " << session->info.inst.name << dendl;
xlist<ClientLease*>::iterator p = session->leases.begin();
while (!p.end()) {
ClientLease *l = *p;
@@ -2358,7 +2357,7 @@ void Locker::handle_client_caps(MClientCaps *m)
// We can infer that the client WONT send a FLUSHSNAP once they have
// released all WR/EXCL caps (the FLUSHSNAP always comes before the cap
// update/release).
- if (head_in->client_need_snapflush.size()) {
+ if (!head_in->client_need_snapflush.empty()) {
if ((cap->issued() & CEPH_CAP_ANY_FILE_WR) == 0) {
_do_null_snapflush(head_in, client, follows);
} else {
diff --git a/src/mds/LogEvent.cc b/src/mds/LogEvent.cc
index 0c3b965f86a..c4f18c756a9 100644
--- a/src/mds/LogEvent.cc
+++ b/src/mds/LogEvent.cc
@@ -18,8 +18,6 @@
#include "MDS.h"
// events i know of
-#include "events/EString.h"
-
#include "events/ESubtreeMap.h"
#include "events/EExport.h"
#include "events/EImportStart.h"
@@ -44,16 +42,28 @@ LogEvent *LogEvent::decode(bufferlist& bl)
// parse type, length
bufferlist::iterator p = bl.begin();
__u32 type;
+ LogEvent *event = NULL;
::decode(type, p);
+ if (EVENT_NEW_ENCODING == type) {
+ DECODE_START(1, p);
+ ::decode(type, p);
+ event = decode_event(bl, p, type);
+ DECODE_FINISH(p);
+ } else { // we are using classic encoding
+ event = decode_event(bl, p, type);
+ }
+ return event;
+}
+
+LogEvent *LogEvent::decode_event(bufferlist& bl, bufferlist::iterator& p, __u32 type)
+{
int length = bl.length() - p.get_off();
generic_dout(15) << "decode_log_event type " << type << ", size " << length << dendl;
// create event
LogEvent *le;
switch (type) {
- case EVENT_STRING: le = new EString; break;
-
case EVENT_SUBTREEMAP: le = new ESubtreeMap; break;
case EVENT_SUBTREEMAP_TEST:
le = new ESubtreeMap;
@@ -67,6 +77,7 @@ LogEvent *LogEvent::decode(bufferlist& bl)
case EVENT_RESETJOURNAL: le = new EResetJournal; break;
case EVENT_SESSION: le = new ESession; break;
+ case EVENT_SESSIONS_OLD: le = new ESessions; ((ESessions *)le)->mark_old_encoding(); break;
case EVENT_SESSIONS: le = new ESessions; break;
case EVENT_UPDATE: le = new EUpdate; break;
diff --git a/src/mds/LogEvent.h b/src/mds/LogEvent.h
index e0b4bea4dc6..fdf145c85ea 100644
--- a/src/mds/LogEvent.h
+++ b/src/mds/LogEvent.h
@@ -15,7 +15,8 @@
#ifndef CEPH_LOGEVENT_H
#define CEPH_LOGEVENT_H
-#define EVENT_STRING 1
+#define EVENT_NEW_ENCODING 0 // indicates that the encoding is versioned
+#define EVENT_UNUSED 1 // was previously EVENT_STRING
#define EVENT_SUBTREEMAP 2
#define EVENT_EXPORT 3
@@ -26,7 +27,8 @@
#define EVENT_RESETJOURNAL 9
#define EVENT_SESSION 10
-#define EVENT_SESSIONS 11
+#define EVENT_SESSIONS_OLD 11
+#define EVENT_SESSIONS 12
#define EVENT_UPDATE 20
#define EVENT_SLAVEUPDATE 21
@@ -54,6 +56,7 @@ class LogEvent {
private:
__u32 _type;
uint64_t _start_off;
+ static LogEvent *decode_event(bufferlist& bl, bufferlist::iterator& p, __u32 type);
protected:
utime_t stamp;
@@ -82,11 +85,14 @@ protected:
static LogEvent *decode(bufferlist &bl);
void encode_with_header(bufferlist& bl) {
+ ::encode(EVENT_NEW_ENCODING, bl);
+ ENCODE_START(1, 1, bl)
::encode(_type, bl);
encode(bl);
+ ENCODE_FINISH(bl);
}
- virtual void print(ostream& out) {
+ virtual void print(ostream& out) const {
out << "event(" << _type << ")";
}
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 7d04563c78a..8762400ab55 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -51,7 +51,6 @@
#include "events/ESubtreeMap.h"
#include "events/EUpdate.h"
#include "events/ESlaveUpdate.h"
-#include "events/EString.h"
#include "events/EImportFinish.h"
#include "events/EFragment.h"
#include "events/ECommitted.h"
@@ -312,7 +311,7 @@ CInode *MDCache::create_system_inode(inodeno_t ino, int mode)
CInode *MDCache::create_root_inode()
{
CInode *i = create_system_inode(MDS_INO_ROOT, S_IFDIR|0755);
- i->default_layout = new struct default_file_layout;
+ i->default_layout = new struct file_layout_policy_t;
i->default_layout->layout = default_file_layout;
i->default_layout->layout.fl_pg_pool = mds->mdsmap->get_first_data_pool();
return i;
@@ -4942,7 +4941,7 @@ void MDCache::rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap_reconn
void MDCache::try_reconnect_cap(CInode *in, Session *session)
{
- client_t client = session->get_client();
+ client_t client = session->info.get_client();
ceph_mds_cap_reconnect *rc = get_replay_cap_reconnect(in->ino(), client);
if (rc) {
in->reconnect_cap(client, *rc, session);
@@ -4968,10 +4967,10 @@ void MDCache::try_reconnect_cap(CInode *in, Session *session)
void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap)
{
- client_t client = session->inst.name.num();
+ client_t client = session->info.inst.name.num();
SnapRealm *realm = in->find_snaprealm();
if (realm->have_past_parents_open()) {
- dout(10) << "do_cap_import " << session->inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl;
+ dout(10) << "do_cap_import " << session->info.inst.name << " mseq " << cap->get_mseq() << " on " << *in << dendl;
cap->set_last_issue();
MClientCaps *reap = new MClientCaps(CEPH_CAP_OP_IMPORT,
in->ino(),
@@ -4983,7 +4982,7 @@ void MDCache::do_cap_import(Session *session, CInode *in, Capability *cap)
realm->build_snap_trace(reap->snapbl);
mds->send_message_client_counted(reap, session);
} else {
- dout(10) << "do_cap_import missing past snap parents, delaying " << session->inst.name << " mseq "
+ dout(10) << "do_cap_import missing past snap parents, delaying " << session->info.inst.name << " mseq "
<< cap->get_mseq() << " on " << *in << dendl;
in->auth_pin(this);
cap->inc_suppress();
@@ -5301,7 +5300,7 @@ void MDCache::queue_file_recover(CInode *in)
predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY);
s.erase(*s.begin());
- while (s.size()) {
+ while (!s.empty()) {
snapid_t snapid = *s.begin();
CInode *cow_inode = 0;
journal_cow_inode(mut, &le->metablob, in, snapid-1, &cow_inode);
@@ -7998,7 +7997,7 @@ void MDCache::anchor_create(MDRequest *mdr, CInode *in, Context *onfinish)
// make trace
vector<Anchor> trace;
in->make_anchor_trace(trace);
- if (!trace.size()) {
+ if (trace.empty()) {
assert(MDS_INO_IS_BASE(in->ino()));
trace.push_back(Anchor(in->ino(), in->ino(), 0, 0, 0));
}
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index e59f5955916..387201cf9ba 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -408,7 +408,7 @@ void MDS::send_message_client_counted(Message *m, Connection *connection)
void MDS::send_message_client_counted(Message *m, Session *session)
{
version_t seq = session->inc_push_seq();
- dout(10) << "send_message_client_counted " << session->inst.name << " seq "
+ dout(10) << "send_message_client_counted " << session->info.inst.name << " seq "
<< seq << " " << *m << dendl;
if (session->connection) {
messenger->send_message(m, session->connection);
@@ -419,7 +419,7 @@ void MDS::send_message_client_counted(Message *m, Session *session)
void MDS::send_message_client(Message *m, Session *session)
{
- dout(10) << "send_message_client " << session->inst << " " << *m << dendl;
+ dout(10) << "send_message_client " << session->info.inst << " " << *m << dendl;
if (session->connection) {
messenger->send_message(m, session->connection);
} else {
@@ -906,6 +906,20 @@ void MDS::handle_mds_map(MMDSMap *m)
if (want_state == MDSMap::STATE_BOOT) {
dout(10) << "not in map yet" << dendl;
} else {
+ // did i get kicked by someone else?
+ if (g_conf->mds_enforce_unique_name) {
+ if (uint64_t existing = mdsmap->find_mds_gid_by_name(name)) {
+ MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
+ if (i.global_id > monc->get_global_id()) {
+ dout(1) << "handle_mds_map i (" << addr
+ << ") dne in the mdsmap, new instance has larger gid " << i.global_id
+ << ", suicide" << dendl;
+ suicide();
+ goto out;
+ }
+ }
+ }
+
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, respawning myself" << dendl;
respawn();
@@ -1834,7 +1848,7 @@ bool MDS::_dispatch(Message *m)
}
// finish any triggered contexts
- while (finished_queue.size()) {
+ while (!finished_queue.empty()) {
dout(7) << "mds has " << finished_queue.size() << " queued contexts" << dendl;
dout(10) << finished_queue << dendl;
list<Context*> ls;
@@ -2070,14 +2084,14 @@ bool MDS::ms_verify_authorizer(Connection *con, int peer_type,
Session *s = sessionmap.get_session(n);
if (!s) {
s = new Session;
- s->inst.addr = con->get_peer_addr();
- s->inst.name = n;
- dout(10) << " new session " << s << " for " << s->inst << " con " << con << dendl;
+ s->info.inst.addr = con->get_peer_addr();
+ s->info.inst.name = n;
+ dout(10) << " new session " << s << " for " << s->info.inst << " con " << con << dendl;
con->set_priv(s);
s->connection = con;
sessionmap.add_session(s);
} else {
- dout(10) << " existing session " << s << " for " << s->inst << " existing con " << s->connection
+ dout(10) << " existing session " << s << " for " << s->info.inst << " existing con " << s->connection
<< ", new/authorizing con " << con << dendl;
con->set_priv(s->get());
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc
index 2118d5e1de9..010ed286ea3 100644
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -28,6 +28,7 @@ CompatSet get_mdsmap_compat_set() {
feature_incompat.insert(MDS_FEATURE_INCOMPAT_CLIENTRANGES);
feature_incompat.insert(MDS_FEATURE_INCOMPAT_FILELAYOUT);
feature_incompat.insert(MDS_FEATURE_INCOMPAT_DIRINODE);
+ feature_incompat.insert(MDS_FEATURE_INCOMPAT_ENCODING);
return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
}
@@ -64,6 +65,17 @@ void MDSMap::mds_info_t::dump(Formatter *f) const
f->close_section();
}
+void MDSMap::mds_info_t::generate_test_instances(list<mds_info_t*>& ls)
+{
+ mds_info_t *sample = new mds_info_t();
+ ls.push_back(sample);
+ sample = new mds_info_t();
+ sample->global_id = 1;
+ sample->name = "test_instance";
+ sample->rank = 0;
+ ls.push_back(sample);
+}
+
void MDSMap::dump(Formatter *f) const
{
f->dump_int("epoch", epoch);
@@ -116,6 +128,22 @@ void MDSMap::dump(Formatter *f) const
f->dump_int("metadata_pool", metadata_pool);
}
+void MDSMap::generate_test_instances(list<MDSMap*>& ls)
+{
+ MDSMap *m = new MDSMap();
+ m->max_mds = 1;
+ m->data_pools.insert(0);
+ m->metadata_pool = 1;
+ m->cas_pool = 2;
+ m->compat = get_mdsmap_compat_set();
+
+ // these aren't the defaults, just in case anybody gets confused
+ m->session_timeout = 61;
+ m->session_autoclose = 301;
+ m->max_file_size = 1<<24;
+ ls.push_back(m);
+}
+
void MDSMap::print(ostream& out)
{
out << "epoch\t" << epoch << "\n";
@@ -166,7 +194,7 @@ void MDSMap::print(ostream& out)
out << " '" << info.standby_for_name << "'";
out << ")";
}
- if (info.export_targets.size())
+ if (!info.export_targets.empty())
out << " export_targets=" << info.export_targets;
out << "\n";
}
@@ -194,13 +222,13 @@ void MDSMap::print_summary(ostream& out)
out << "e" << get_epoch() << ": " << up.size() << "/" << in.size() << "/" << max_mds << " up";
- if (by_rank.size())
+ if (!by_rank.empty())
out << " " << by_rank;
for (map<string,int>::reverse_iterator p = by_state.rbegin(); p != by_state.rend(); p++)
out << ", " << p->second << " " << p->first;
- if (failed.size())
+ if (!failed.empty())
out << ", " << failed.size() << " failed";
//if (stopped.size())
//out << ", " << stopped.size() << " stopped";
@@ -243,7 +271,7 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
}
}
}
- if (laggy.size()) {
+ if (!laggy.empty()) {
std::ostringstream oss;
oss << "mds " << laggy
<< ((laggy.size() > 1) ? " are":" is")
@@ -251,3 +279,210 @@ void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
summary.push_back(make_pair(HEALTH_WARN, oss.str()));
}
}
+
+void MDSMap::mds_info_t::encode_versioned(bufferlist& bl, uint64_t features) const
+{
+ ENCODE_START(4, 4, bl);
+ ::encode(global_id, bl);
+ ::encode(name, bl);
+ ::encode(rank, bl);
+ ::encode(inc, bl);
+ ::encode(state, bl);
+ ::encode(state_seq, bl);
+ ::encode(addr, bl);
+ ::encode(laggy_since, bl);
+ ::encode(standby_for_rank, bl);
+ ::encode(standby_for_name, bl);
+ ::encode(export_targets, bl);
+ ENCODE_FINISH(bl);
+}
+
+void MDSMap::mds_info_t::encode_unversioned(bufferlist& bl) const
+{
+ __u8 struct_v = 3;
+ ::encode(struct_v, bl);
+ ::encode(global_id, bl);
+ ::encode(name, bl);
+ ::encode(rank, bl);
+ ::encode(inc, bl);
+ ::encode(state, bl);
+ ::encode(state_seq, bl);
+ ::encode(addr, bl);
+ ::encode(laggy_since, bl);
+ ::encode(standby_for_rank, bl);
+ ::encode(standby_for_name, bl);
+ ::encode(export_targets, bl);
+}
+
+void MDSMap::mds_info_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ ::decode(global_id, bl);
+ ::decode(name, bl);
+ ::decode(rank, bl);
+ ::decode(inc, bl);
+ ::decode(state, bl);
+ ::decode(state_seq, bl);
+ ::decode(addr, bl);
+ ::decode(laggy_since, bl);
+ ::decode(standby_for_rank, bl);
+ ::decode(standby_for_name, bl);
+ if (struct_v >= 2)
+ ::decode(export_targets, bl);
+ DECODE_FINISH(bl);
+}
+
+
+
+void MDSMap::encode(bufferlist& bl, uint64_t features) const
+{
+ if ((features & CEPH_FEATURE_PGID64) == 0) {
+ __u16 v = 2;
+ ::encode(v, bl);
+ ::encode(epoch, bl);
+ ::encode(flags, bl);
+ ::encode(last_failure, bl);
+ ::encode(root, bl);
+ ::encode(session_timeout, bl);
+ ::encode(session_autoclose, bl);
+ ::encode(max_file_size, bl);
+ ::encode(max_mds, bl);
+ __u32 n = mds_info.size();
+ ::encode(n, bl);
+ for (map<uint64_t, mds_info_t>::const_iterator i = mds_info.begin();
+ i != mds_info.end(); ++i) {
+ ::encode(i->first, bl);
+ ::encode(i->second, bl, features);
+ }
+ n = data_pools.size();
+ ::encode(n, bl);
+ for (set<int64_t>::const_iterator p = data_pools.begin(); p != data_pools.end(); ++p) {
+ n = *p;
+ ::encode(n, bl);
+ }
+
+ int32_t m = cas_pool;
+ ::encode(m, bl);
+ return;
+ } else if ((features & CEPH_FEATURE_MDSENC) == 0) {
+ __u16 v = 3;
+ ::encode(v, bl);
+ ::encode(epoch, bl);
+ ::encode(flags, bl);
+ ::encode(last_failure, bl);
+ ::encode(root, bl);
+ ::encode(session_timeout, bl);
+ ::encode(session_autoclose, bl);
+ ::encode(max_file_size, bl);
+ ::encode(max_mds, bl);
+ __u32 n = mds_info.size();
+ ::encode(n, bl);
+ for (map<uint64_t, mds_info_t>::const_iterator i = mds_info.begin();
+ i != mds_info.end(); ++i) {
+ ::encode(i->first, bl);
+ ::encode(i->second, bl, features);
+ }
+ ::encode(data_pools, bl);
+ ::encode(cas_pool, bl);
+
+ // kclient ignores everything from here
+ __u16 ev = 5;
+ ::encode(ev, bl);
+ ::encode(compat, bl);
+ ::encode(metadata_pool, bl);
+ ::encode(created, bl);
+ ::encode(modified, bl);
+ ::encode(tableserver, bl);
+ ::encode(in, bl);
+ ::encode(inc, bl);
+ ::encode(up, bl);
+ ::encode(failed, bl);
+ ::encode(stopped, bl);
+ ::encode(last_failure_osd_epoch, bl);
+ } else {// have MDS encoding feature!
+ ENCODE_START(4, 4, bl);
+ ::encode(epoch, bl);
+ ::encode(flags, bl);
+ ::encode(last_failure, bl);
+ ::encode(root, bl);
+ ::encode(session_timeout, bl);
+ ::encode(session_autoclose, bl);
+ ::encode(max_file_size, bl);
+ ::encode(max_mds, bl);
+ ::encode(mds_info, bl, features);
+ ::encode(data_pools, bl);
+ ::encode(cas_pool, bl);
+
+ // kclient ignores everything from here
+ __u16 ev = 5;
+ ::encode(ev, bl);
+ ::encode(compat, bl);
+ ::encode(metadata_pool, bl);
+ ::encode(created, bl);
+ ::encode(modified, bl);
+ ::encode(tableserver, bl);
+ ::encode(in, bl);
+ ::encode(inc, bl);
+ ::encode(up, bl);
+ ::encode(failed, bl);
+ ::encode(stopped, bl);
+ ::encode(last_failure_osd_epoch, bl);
+ ENCODE_FINISH(bl);
+ }
+}
+
+void MDSMap::decode(bufferlist::iterator& p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN_16(4, 4, 4, p);
+ ::decode(epoch, p);
+ ::decode(flags, p);
+ ::decode(last_failure, p);
+ ::decode(root, p);
+ ::decode(session_timeout, p);
+ ::decode(session_autoclose, p);
+ ::decode(max_file_size, p);
+ ::decode(max_mds, p);
+ ::decode(mds_info, p);
+ if (struct_v < 3) {
+ __u32 n;
+ ::decode(n, p);
+ while (n--) {
+ __u32 m;
+ ::decode(m, p);
+ data_pools.insert(m);
+ }
+ __s32 s;
+ ::decode(s, p);
+ cas_pool = s;
+ } else {
+ ::decode(data_pools, p);
+ ::decode(cas_pool, p);
+ }
+
+ // kclient ignores everything from here
+ __u16 ev = 1;
+ if (struct_v >= 2)
+ ::decode(ev, p);
+ if (ev >= 3)
+ ::decode(compat, p);
+ else
+ compat = get_mdsmap_compat_set_base();
+ if (ev < 5) {
+ __u32 n;
+ ::decode(n, p);
+ metadata_pool = n;
+ } else {
+ ::decode(metadata_pool, p);
+ }
+ ::decode(created, p);
+ ::decode(modified, p);
+ ::decode(tableserver, p);
+ ::decode(in, p);
+ ::decode(inc, p);
+ ::decode(up, p);
+ ::decode(failed, p);
+ ::decode(stopped, p);
+ if (ev >= 4)
+ ::decode(last_failure_osd_epoch, p);
+ DECODE_FINISH(p);
+}
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h
index 47c2c52d23d..64f10afd6f4 100644
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -30,6 +30,7 @@ using namespace std;
#include "common/config.h"
#include "include/CompatSet.h"
+#include "include/ceph_features.h"
#include "common/Formatter.h"
/*
@@ -65,6 +66,7 @@ extern CompatSet get_mdsmap_compat_set_base(); // pre v0.20
#define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
#define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
#define MDS_FEATURE_INCOMPAT_DIRINODE CompatSet::Feature(4, "dir inode in separate object")
+#define MDS_FEATURE_INCOMPAT_ENCODING CompatSet::Feature(5, "mds uses versioned encoding")
class MDSMap {
public:
@@ -123,38 +125,16 @@ public:
entity_inst_t get_inst() const { return entity_inst_t(entity_name_t::MDS(rank), addr); }
- void encode(bufferlist& bl) const {
- __u8 v = 3;
- ::encode(v, bl);
- ::encode(global_id, bl);
- ::encode(name, bl);
- ::encode(rank, bl);
- ::encode(inc, bl);
- ::encode(state, bl);
- ::encode(state_seq, bl);
- ::encode(addr, bl);
- ::encode(laggy_since, bl);
- ::encode(standby_for_rank, bl);
- ::encode(standby_for_name, bl);
- ::encode(export_targets, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 v;
- ::decode(v, bl);
- ::decode(global_id, bl);
- ::decode(name, bl);
- ::decode(rank, bl);
- ::decode(inc, bl);
- ::decode(state, bl);
- ::decode(state_seq, bl);
- ::decode(addr, bl);
- ::decode(laggy_since, bl);
- ::decode(standby_for_rank, bl);
- ::decode(standby_for_name, bl);
- if (v >= 2)
- ::decode(export_targets, bl);
+ void encode(bufferlist& bl, uint64_t features) const {
+ if ((features & CEPH_FEATURE_MDSENC) == 0 ) encode_unversioned(bl);
+ else encode_versioned(bl, features);
}
+ void decode(bufferlist::iterator& p);
void dump(Formatter *f) const;
+ static void generate_test_instances(list<mds_info_t*>& ls);
+ private:
+ void encode_versioned(bufferlist& bl, uint64_t features) const;
+ void encode_unversioned(bufferlist& bl) const;
};
@@ -256,6 +236,16 @@ public:
assert(up.count(m) && mds_info.count(up[m]));
return mds_info[up[m]];
}
+ uint64_t find_mds_gid_by_name(const string& s) {
+ for (map<uint64_t,mds_info_t>::const_iterator p = mds_info.begin();
+ p != mds_info.end();
+ ++p) {
+ if (p->second.name == s) {
+ return p->first;
+ }
+ }
+ return 0;
+ }
// counts
unsigned get_num_in_mds() {
@@ -465,7 +455,7 @@ public:
failed.empty();
}
bool is_stopped() {
- return up.size() == 0;
+ return up.empty();
}
// inst
@@ -504,112 +494,8 @@ public:
return mds_info[gid].inc;
return -1;
}
-
- void encode_client_old(bufferlist& bl) const {
- __u16 v = 2;
- ::encode(v, bl);
- ::encode(epoch, bl);
- ::encode(flags, bl);
- ::encode(last_failure, bl);
- ::encode(root, bl);
- ::encode(session_timeout, bl);
- ::encode(session_autoclose, bl);
- ::encode(max_file_size, bl);
- ::encode(max_mds, bl);
- ::encode(mds_info, bl);
- __u32 n = data_pools.size();
- ::encode(n, bl);
- for (set<int64_t>::const_iterator p = data_pools.begin(); p != data_pools.end(); ++p) {
- n = *p;
- ::encode(n, bl);
- }
- int32_t m = cas_pool;
- ::encode(m, bl);
- }
- void encode(bufferlist& bl) const {
- __u16 v = 3;
- ::encode(v, bl);
- ::encode(epoch, bl);
- ::encode(flags, bl);
- ::encode(last_failure, bl);
- ::encode(root, bl);
- ::encode(session_timeout, bl);
- ::encode(session_autoclose, bl);
- ::encode(max_file_size, bl);
- ::encode(max_mds, bl);
- ::encode(mds_info, bl);
- ::encode(data_pools, bl);
- ::encode(cas_pool, bl);
-
- // kclient ignores everything from here
- __u16 ev = 5;
- ::encode(ev, bl);
- ::encode(compat, bl);
- ::encode(metadata_pool, bl);
- ::encode(created, bl);
- ::encode(modified, bl);
- ::encode(tableserver, bl);
- ::encode(in, bl);
- ::encode(inc, bl);
- ::encode(up, bl);
- ::encode(failed, bl);
- ::encode(stopped, bl);
- ::encode(last_failure_osd_epoch, bl);
- }
- void decode(bufferlist::iterator& p) {
- __u16 v;
- ::decode(v, p);
- ::decode(epoch, p);
- ::decode(flags, p);
- ::decode(last_failure, p);
- ::decode(root, p);
- ::decode(session_timeout, p);
- ::decode(session_autoclose, p);
- ::decode(max_file_size, p);
- ::decode(max_mds, p);
- ::decode(mds_info, p);
- if (v < 3) {
- __u32 n;
- ::decode(n, p);
- while (n--) {
- __u32 m;
- ::decode(m, p);
- data_pools.insert(m);
- }
- __s32 s;
- ::decode(s, p);
- cas_pool = s;
- } else {
- ::decode(data_pools, p);
- ::decode(cas_pool, p);
- }
-
- // kclient ignores everything from here
- __u16 ev = 1;
- if (v >= 2)
- ::decode(ev, p);
- if (ev >= 3)
- ::decode(compat, p);
- else
- compat = get_mdsmap_compat_set_base();
- if (ev < 5) {
- __u32 n;
- ::decode(n, p);
- metadata_pool = n;
- } else {
- ::decode(metadata_pool, p);
- }
- ::decode(created, p);
- ::decode(modified, p);
- ::decode(tableserver, p);
- ::decode(in, p);
- ::decode(inc, p);
- ::decode(up, p);
- ::decode(failed, p);
- ::decode(stopped, p);
- if (ev >= 4)
- ::decode(last_failure_osd_epoch, p);
- }
+ void encode(bufferlist& bl, uint64_t features) const;
+ void decode(bufferlist::iterator& p);
void decode(bufferlist& bl) {
bufferlist::iterator p = bl.begin();
decode(p);
@@ -620,9 +506,10 @@ public:
void print_summary(ostream& out);
void dump(Formatter *f) const;
+ static void generate_test_instances(list<MDSMap*>& ls);
};
-WRITE_CLASS_ENCODER(MDSMap::mds_info_t)
-WRITE_CLASS_ENCODER(MDSMap)
+WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t)
+WRITE_CLASS_ENCODER_FEATURES(MDSMap)
inline ostream& operator<<(ostream& out, MDSMap& m) {
m.print_summary(out);
diff --git a/src/mds/MDSTableServer.cc b/src/mds/MDSTableServer.cc
index 7175bbb3cfe..6eadd82a500 100644
--- a/src/mds/MDSTableServer.cc
+++ b/src/mds/MDSTableServer.cc
@@ -156,7 +156,7 @@ void MDSTableServer::handle_mds_recovery(int who)
dout(7) << "handle_mds_recovery mds." << who << dendl;
// resend agrees for recovered mds
- for (map<version_t,_pending>::iterator p = pending_for_mds.begin();
+ for (map<version_t,mds_table_pending_t>::iterator p = pending_for_mds.begin();
p != pending_for_mds.end();
p++) {
if (who >= 0 && p->second.mds != who)
diff --git a/src/mds/MDSTableServer.h b/src/mds/MDSTableServer.h
index 1467263d8b7..26cd5944844 100644
--- a/src/mds/MDSTableServer.h
+++ b/src/mds/MDSTableServer.h
@@ -22,31 +22,7 @@ class MMDSTableRequest;
class MDSTableServer : public MDSTable {
public:
int table;
-
- /* mds's requesting any pending ops. child needs to encodig the corresponding
- * pending mutation state in the table.
- */
- struct _pending {
- uint64_t reqid;
- __s32 mds;
- version_t tid;
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(reqid, bl);
- ::encode(mds, bl);
- ::encode(tid, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(reqid, bl);
- ::decode(mds, bl);
- ::decode(tid, bl);
- }
- };
- WRITE_CLASS_ENCODER(_pending)
- map<version_t,_pending> pending_for_mds; // ** child should encode this! **
+ map<version_t,mds_table_pending_t> pending_for_mds; // ** child should encode this! **
private:
@@ -117,6 +93,5 @@ private:
void finish_recovery();
void handle_mds_recovery(int who);
};
-WRITE_CLASS_ENCODER(MDSTableServer::_pending)
#endif
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc
index 3449306d64a..123986908a1 100644
--- a/src/mds/Migrator.cc
+++ b/src/mds/Migrator.cc
@@ -27,7 +27,6 @@
#include "include/filepath.h"
-#include "events/EString.h"
#include "events/EExport.h"
#include "events/EImportStart.h"
#include "events/EImportFinish.h"
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 54585128eda..ac51e60d0a9 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -42,7 +42,6 @@
#include "messages/MDentryUnlink.h"
-#include "events/EString.h"
#include "events/EUpdate.h"
#include "events/ESlaveUpdate.h"
#include "events/ESession.h"
@@ -159,7 +158,7 @@ Session *Server::get_session(Message *m)
{
Session *session = (Session *)m->get_connection()->get_priv();
if (session) {
- dout(20) << "get_session have " << session << " " << session->inst
+ dout(20) << "get_session have " << session << " " << session->info.inst
<< " state " << session->get_state_name() << dendl;
session->put(); // not carry ref
} else {
@@ -261,7 +260,7 @@ void Server::handle_client_session(MClientSession *m)
void Server::_session_logged(Session *session, uint64_t state_seq, bool open, version_t pv,
interval_set<inodeno_t>& inos, version_t piv)
{
- dout(10) << "_session_logged " << session->inst << " state_seq " << state_seq << " " << (open ? "open":"close")
+ dout(10) << "_session_logged " << session->info.inst << " state_seq " << state_seq << " " << (open ? "open":"close")
<< " " << pv << dendl;
if (piv) {
@@ -286,7 +285,7 @@ void Server::_session_logged(Session *session, uint64_t state_seq, bool open, ve
Capability *cap = session->caps.front();
CInode *in = cap->get_inode();
dout(20) << " killing capability " << ccap_string(cap->issued()) << " on " << *in << dendl;
- mds->locker->remove_client_cap(in, session->inst.name.num());
+ mds->locker->remove_client_cap(in, session->info.inst.name.num());
}
while (!session->leases.empty()) {
ClientLease *r = session->leases.front();
@@ -302,7 +301,7 @@ void Server::_session_logged(Session *session, uint64_t state_seq, bool open, ve
session->clear();
} else if (session->is_killing()) {
// destroy session, close connection
- mds->messenger->mark_down(session->inst.addr);
+ mds->messenger->mark_down(session->info.inst.addr);
mds->sessionmap.remove_session(session);
} else {
assert(0);
@@ -353,9 +352,9 @@ void Server::finish_force_open_sessions(map<client_t,entity_inst_t>& cm,
if (sseqmap.count(p->first)) {
uint64_t sseq = sseqmap[p->first];
if (session->get_state_seq() != sseq) {
- dout(10) << "force_open_sessions skipping changed " << session->inst << dendl;
+ dout(10) << "force_open_sessions skipping changed " << session->info.inst << dendl;
} else {
- dout(10) << "force_open_sessions opened " << session->inst << dendl;
+ dout(10) << "force_open_sessions opened " << session->info.inst << dendl;
mds->sessionmap.set_state(session, Session::STATE_OPEN);
mds->sessionmap.touch_session(session);
Message *m = new MClientSession(CEPH_SESSION_OPEN);
@@ -365,7 +364,7 @@ void Server::finish_force_open_sessions(map<client_t,entity_inst_t>& cm,
session->preopen_out_queue.push_back(m);
}
} else {
- dout(10) << "force_open_sessions skipping already-open " << session->inst << dendl;
+ dout(10) << "force_open_sessions skipping already-open " << session->info.inst << dendl;
assert(session->is_open() || session->is_stale());
}
session->dec_importing();
@@ -415,14 +414,14 @@ void Server::find_idle_sessions()
while (1) {
Session *session = mds->sessionmap.get_oldest_session(Session::STATE_OPEN);
if (!session) break;
- dout(20) << "laggiest active session is " << session->inst << dendl;
+ dout(20) << "laggiest active session is " << session->info.inst << dendl;
if (session->last_cap_renew >= cutoff) {
- dout(20) << "laggiest active session is " << session->inst << " and sufficiently new ("
+ dout(20) << "laggiest active session is " << session->info.inst << " and sufficiently new ("
<< session->last_cap_renew << ")" << dendl;
break;
}
- dout(10) << "new stale session " << session->inst << " last " << session->last_cap_renew << dendl;
+ dout(10) << "new stale session " << session->info.inst << " last " << session->last_cap_renew << dendl;
mds->sessionmap.set_state(session, Session::STATE_STALE);
mds->locker->revoke_stale_caps(session);
mds->locker->remove_stale_leases(session);
@@ -445,21 +444,21 @@ void Server::find_idle_sessions()
if (!session)
break;
if (session->is_importing()) {
- dout(10) << "stopping at importing session " << session->inst << dendl;
+ dout(10) << "stopping at importing session " << session->info.inst << dendl;
break;
}
assert(session->is_stale());
if (session->last_cap_renew >= cutoff) {
- dout(20) << "oldest stale session is " << session->inst << " and sufficiently new ("
+ dout(20) << "oldest stale session is " << session->info.inst << " and sufficiently new ("
<< session->last_cap_renew << ")" << dendl;
break;
}
utime_t age = now;
age -= session->last_cap_renew;
- mds->clog.info() << "closing stale session " << session->inst
+ mds->clog.info() << "closing stale session " << session->info.inst
<< " after " << age << "\n";
- dout(10) << "autoclosing stale session " << session->inst << " last " << session->last_cap_renew << dendl;
+ dout(10) << "autoclosing stale session " << session->info.inst << " last " << session->last_cap_renew << dendl;
kill_session(session);
}
}
@@ -490,7 +489,7 @@ void Server::journal_close_session(Session *session, int state)
// release alloc and pending-alloc inos for this session
// and wipe out session state, in case the session close aborts for some reason
interval_set<inodeno_t> both;
- both.swap(session->prealloc_inos);
+ both.swap(session->info.prealloc_inos);
both.insert(session->pending_prealloc_inos);
session->pending_prealloc_inos.clear();
if (both.size()) {
@@ -499,7 +498,7 @@ void Server::journal_close_session(Session *session, int state)
} else
piv = 0;
- mdlog->start_submit_entry(new ESession(session->inst, false, pv, both, piv),
+ mdlog->start_submit_entry(new ESession(session->info.inst, false, pv, both, piv),
new C_MDS_session_finish(mds, session, sseq, false, pv, both, piv));
mdlog->flush();
@@ -569,13 +568,13 @@ void Server::handle_client_reconnect(MClientReconnect *m)
mds->sessionmap.set_state(session, Session::STATE_OPENING);
version_t pv = ++mds->sessionmap.projected;
uint64_t sseq = session->get_state_seq();
- mdlog->start_submit_entry(new ESession(session->inst, true, pv),
+ mdlog->start_submit_entry(new ESession(session->info.inst, true, pv),
new C_MDS_session_finish(mds, session, sseq, true, pv));
mdlog->flush();
- mds->clog.debug() << "reconnect by new " << session->inst
+ mds->clog.debug() << "reconnect by new " << session->info.inst
<< " after " << delay << "\n";
} else {
- mds->clog.debug() << "reconnect by " << session->inst
+ mds->clog.debug() << "reconnect by " << session->info.inst
<< " after " << delay << "\n";
}
@@ -675,7 +674,7 @@ void Server::reconnect_tick()
p != client_reconnect_gather.end();
p++) {
Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(p->v));
- dout(1) << "reconnect gave up on " << session->inst << dendl;
+ dout(1) << "reconnect gave up on " << session->info.inst << dendl;
failed_reconnects++;
}
client_reconnect_gather.clear();
@@ -723,10 +722,10 @@ void Server::recall_client_state(float ratio)
++p) {
Session *session = *p;
if (!session->is_open() ||
- !session->inst.name.is_client())
+ !session->info.inst.name.is_client())
continue;
- dout(10) << " session " << session->inst
+ dout(10) << " session " << session->info.inst
<< " caps " << session->caps.size()
<< ", leases " << session->leases.size()
<< dendl;
@@ -837,7 +836,7 @@ void Server::early_reply(MDRequest *mdr, CInode *tracei, CDentry *tracedn)
mdr->cap_releases.erase(tracedn->get_dir()->get_inode()->vino());
set_trace_dist(mdr->session, reply, tracei, tracedn, mdr->snapid,
- mdr->client_request->get_dentry_wanted());
+ mdr->client_request->get_dentry_wanted(), req->may_write());
}
reply->set_extra_bl(mdr->reply_extra_bl);
@@ -918,7 +917,8 @@ void Server::reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei,
mdcache->try_reconnect_cap(tracei, session);
} else {
// include metadata in reply
- set_trace_dist(session, reply, tracei, tracedn, snapid, dentry_wanted);
+ set_trace_dist(session, reply, tracei, tracedn,
+ snapid, dentry_wanted, req->may_write());
}
}
@@ -975,8 +975,16 @@ void Server::encode_null_lease(bufferlist& bl)
void Server::set_trace_dist(Session *session, MClientReply *reply,
CInode *in, CDentry *dn,
snapid_t snapid,
- int dentry_wanted)
+ int dentry_wanted,
+ bool modified)
{
+ // skip doing this for debugging purposes?
+ if (modified && g_conf->mds_inject_traceless_reply_probability &&
+ (rand() % 10000 < g_conf->mds_inject_traceless_reply_probability * 10000.0)) {
+ dout(5) << "deliberately skipping trace for " << *reply << dendl;
+ return;
+ }
+
// inode, dentry, dir, ..., inode
bufferlist bl;
int whoami = mds->get_nodeid();
@@ -1744,13 +1752,13 @@ CInode* Server::prepare_new_inode(MDRequest *mdr, CDir *dir, inodeno_t useino, u
CInode *in = new CInode(mdcache);
// assign ino
- if (mdr->session->prealloc_inos.size()) {
+ if (mdr->session->info.prealloc_inos.size()) {
mdr->used_prealloc_ino =
in->inode.ino = mdr->session->take_ino(useino); // prealloc -> used
mds->sessionmap.projected++;
dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino
- << " (" << mdr->session->prealloc_inos
- << ", " << mdr->session->prealloc_inos.size() << " left)"
+ << " (" << mdr->session->info.prealloc_inos
+ << ", " << mdr->session->info.prealloc_inos.size() << " left)"
<< dendl;
} else {
mdr->alloc_ino =
@@ -1855,12 +1863,12 @@ void Server::apply_allocated_inos(MDRequest *mdr)
}
if (mdr->prealloc_inos.size()) {
session->pending_prealloc_inos.subtract(mdr->prealloc_inos);
- session->prealloc_inos.insert(mdr->prealloc_inos);
+ session->info.prealloc_inos.insert(mdr->prealloc_inos);
mds->sessionmap.version++;
mds->inotable->apply_alloc_ids(mdr->prealloc_inos);
}
if (mdr->used_prealloc_ino) {
- session->used_inos.erase(mdr->used_prealloc_ino);
+ session->info.used_inos.erase(mdr->used_prealloc_ino);
mds->sessionmap.version++;
}
}
@@ -3443,7 +3451,7 @@ void Server::handle_client_setdirlayout(MDRequest *mdr)
return;
// validate layout
- default_file_layout *layout = new default_file_layout;
+ file_layout_policy_t *layout = new file_layout_policy_t;
if (cur->get_projected_dir_layout())
layout->layout = *cur->get_projected_dir_layout();
else if (dir_layout)
@@ -3570,7 +3578,7 @@ void Server::handle_set_vxattr(MDRequest *mdr, CInode *cur,
return;
}
- default_file_layout *dlayout = new default_file_layout;
+ file_layout_policy_t *dlayout = new file_layout_policy_t;
if (cur->get_projected_dir_layout())
dlayout->layout = *cur->get_projected_dir_layout();
else if (dir_layout)
diff --git a/src/mds/Server.h b/src/mds/Server.h
index 79977fc8dd5..4bf3f8604eb 100644
--- a/src/mds/Server.h
+++ b/src/mds/Server.h
@@ -103,7 +103,7 @@ public:
void reply_request(MDRequest *mdr, MClientReply *reply, CInode *tracei = 0, CDentry *tracedn = 0);
void set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn,
snapid_t snapid,
- int num_dentries_wanted);
+ int num_dentries_wanted, bool modified);
void encode_empty_dirstat(bufferlist& bl);
void encode_infinite_lease(bufferlist& bl);
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc
index aee47d11c4d..53fe90c10ba 100644
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -33,9 +33,9 @@ void SessionMap::dump()
++p)
dout(10) << p->first << " " << p->second
<< " state " << p->second->get_state_name()
- << " completed " << p->second->completed_requests
- << " prealloc_inos " << p->second->prealloc_inos
- << " used_ions " << p->second->used_inos
+ << " completed " << p->second->info.completed_requests
+ << " prealloc_inos " << p->second->info.prealloc_inos
+ << " used_ions " << p->second->info.used_inos
<< dendl;
}
@@ -140,26 +140,26 @@ void SessionMap::_save_finish(version_t v)
// -------------------
-void SessionMap::encode(bufferlist& bl)
+void SessionMap::encode(bufferlist& bl) const
{
uint64_t pre = -1; // for 0.19 compatibility; we forgot an encoding prefix.
::encode(pre, bl);
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
-
+ ENCODE_START(3, 3, bl);
::encode(version, bl);
- for (hash_map<entity_name_t,Session*>::iterator p = session_map.begin();
+ for (hash_map<entity_name_t,Session*>::const_iterator p = session_map.begin();
p != session_map.end();
- ++p)
+ ++p) {
if (p->second->is_open() ||
p->second->is_closing() ||
p->second->is_stale() ||
p->second->is_killing()) {
::encode(p->first, bl);
- p->second->encode(bl);
+ p->second->info.encode(bl);
}
+ }
+ ENCODE_FINISH(bl);
}
void SessionMap::decode(bufferlist::iterator& p)
@@ -168,21 +168,21 @@ void SessionMap::decode(bufferlist::iterator& p)
uint64_t pre;
::decode(pre, p);
if (pre == (uint64_t)-1) {
- __u8 struct_v;
- ::decode(struct_v, p);
- assert(struct_v == 2);
-
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, p);
+ assert(struct_v >= 2);
+
::decode(version, p);
-
+
while (!p.end()) {
entity_inst_t inst;
::decode(inst.name, p);
Session *s = get_or_add_session(inst);
if (s->is_closed())
set_state(s, Session::STATE_OPEN);
- s->decode(p);
+ s->info.decode(p);
}
+ DECODE_FINISH(p);
} else {
// --- old format ----
version = pre;
@@ -194,17 +194,17 @@ void SessionMap::decode(bufferlist::iterator& p)
while (n-- && !p.end()) {
bufferlist::iterator p2 = p;
Session *s = new Session;
- s->decode(p);
- if (session_map.count(s->inst.name)) {
+ s->info.decode(p);
+ if (session_map.count(s->info.inst.name)) {
// eager client connected too fast! aie.
- dout(10) << " already had session for " << s->inst.name << ", recovering" << dendl;
- entity_name_t n = s->inst.name;
+ dout(10) << " already had session for " << s->info.inst.name << ", recovering" << dendl;
+ entity_name_t n = s->info.inst.name;
delete s;
s = session_map[n];
p = p2;
- s->decode(p);
+ s->info.decode(p);
} else {
- session_map[s->inst.name] = s;
+ session_map[s->info.inst.name] = s;
}
set_state(s, Session::STATE_OPEN);
s->last_cap_renew = now;
@@ -212,7 +212,29 @@ void SessionMap::decode(bufferlist::iterator& p)
}
}
+void SessionMap::dump(Formatter *f) const
+{
+ f->open_array_section("Sessions");
+ for (hash_map<entity_name_t,Session*>::const_iterator p = session_map.begin();
+ p != session_map.end();
+ ++p) {
+ f->open_object_section("Session");
+ f->open_object_section("entity name");
+ p->first.dump(f);
+ f->close_section(); // entity name
+ f->open_object_section("Session info");
+ p->second->info.dump(f);
+ f->close_section(); // Session info
+ f->close_section(); // Session
+ }
+ f->close_section(); // Sessions
+}
+void SessionMap::generate_test_instances(list<SessionMap*>& ls)
+{
+ // pretty boring for now
+ ls.push_back(new SessionMap(NULL));
+}
void SessionMap::wipe()
{
@@ -234,8 +256,8 @@ void SessionMap::wipe_ino_prealloc()
p != session_map.end();
++p) {
p->second->pending_prealloc_inos.clear();
- p->second->prealloc_inos.clear();
- p->second->used_inos.clear();
+ p->second->info.prealloc_inos.clear();
+ p->second->info.used_inos.clear();
}
projected = ++version;
}
diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h
index 759454b1873..702a0b5dec8 100644
--- a/src/mds/SessionMap.h
+++ b/src/mds/SessionMap.h
@@ -82,7 +82,8 @@ private:
int importing_count;
friend class SessionMap;
public:
- entity_inst_t inst;
+ session_info_t info; ///< durable bits
+
Connection *connection;
xlist<Session*>::item item_session_list;
@@ -91,35 +92,35 @@ public:
elist<MDRequest*> requests;
interval_set<inodeno_t> pending_prealloc_inos; // journaling prealloc, will be added to prealloc_inos
- interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use.
- interval_set<inodeno_t> used_inos; // journaling use
inodeno_t next_ino() {
- if (prealloc_inos.empty())
+ if (info.prealloc_inos.empty())
return 0;
- return prealloc_inos.range_start();
+ return info.prealloc_inos.range_start();
}
inodeno_t take_ino(inodeno_t ino = 0) {
- assert(!prealloc_inos.empty());
+ assert(!info.prealloc_inos.empty());
if (ino) {
- if (prealloc_inos.contains(ino))
- prealloc_inos.erase(ino);
+ if (info.prealloc_inos.contains(ino))
+ info.prealloc_inos.erase(ino);
else
ino = 0;
}
if (!ino) {
- ino = prealloc_inos.range_start();
- prealloc_inos.erase(ino);
+ ino = info.prealloc_inos.range_start();
+ info.prealloc_inos.erase(ino);
}
- used_inos.insert(ino, 1);
+ info.used_inos.insert(ino, 1);
return ino;
}
int get_num_projected_prealloc_inos() {
- return prealloc_inos.size() + pending_prealloc_inos.size();
+ return info.prealloc_inos.size() + pending_prealloc_inos.size();
}
- client_t get_client() { return client_t(inst.name.num()); }
+ client_t get_client() {
+ return info.get_client();
+ }
int get_state() { return state; }
const char *get_state_name() { return get_state_name(state); }
@@ -164,20 +165,20 @@ public:
// -- completed requests --
private:
- set<tid_t> completed_requests;
+
public:
void add_completed_request(tid_t t) {
- completed_requests.insert(t);
+ info.completed_requests.insert(t);
}
void trim_completed_requests(tid_t mintid) {
// trim
- while (!completed_requests.empty() &&
- (mintid == 0 || *completed_requests.begin() < mintid))
- completed_requests.erase(completed_requests.begin());
+ while (!info.completed_requests.empty() &&
+ (mintid == 0 || *info.completed_requests.begin() < mintid))
+ info.completed_requests.erase(info.completed_requests.begin());
}
bool have_completed_request(tid_t tid) const {
- return completed_requests.count(tid);
+ return info.completed_requests.count(tid);
}
@@ -197,35 +198,14 @@ public:
void clear() {
pending_prealloc_inos.clear();
- prealloc_inos.clear();
- used_inos.clear();
+ info.clear_meta();
cap_push_seq = 0;
last_cap_renew = utime_t();
- completed_requests.clear();
}
- void encode(bufferlist& bl) const {
- __u8 v = 1;
- ::encode(v, bl);
- ::encode(inst, bl);
- ::encode(completed_requests, bl);
- ::encode(prealloc_inos, bl); // hacky, see below.
- ::encode(used_inos, bl);
- }
- void decode(bufferlist::iterator& p) {
- __u8 v;
- ::decode(v, p);
- ::decode(inst, p);
- ::decode(completed_requests, p);
- ::decode(prealloc_inos, p);
- ::decode(used_inos, p);
- prealloc_inos.insert(used_inos);
- used_inos.clear();
- }
};
-WRITE_CLASS_ENCODER(Session)
/*
* session map
@@ -248,6 +228,10 @@ public:
SessionMap(MDS *m) : mds(m),
version(0), projected(0), committing(0), committed(0)
{ }
+
+ //for the dencoder
+ SessionMap() : mds(NULL), version(0), projected(0),
+ committing(0), committed(0) {}
// sessions
bool empty() { return session_map.empty(); }
@@ -282,13 +266,13 @@ public:
s = session_map[i.name];
else
s = session_map[i.name] = new Session;
- s->inst = i;
+ s->info.inst = i;
s->last_cap_renew = ceph_clock_now(g_ceph_context);
return s;
}
void add_session(Session *s) {
- assert(session_map.count(s->inst.name) == 0);
- session_map[s->inst.name] = s;
+ assert(session_map.count(s->info.inst.name) == 0);
+ session_map[s->info.inst.name] = s;
if (by_state.count(s->state) == 0)
by_state[s->state] = new xlist<Session*>;
by_state[s->state]->push_back(&s->item_session_list);
@@ -297,7 +281,7 @@ public:
void remove_session(Session *s) {
s->trim_completed_requests(0);
s->item_session_list.remove_myself();
- session_map.erase(s->inst.name);
+ session_map.erase(s->info.inst.name);
s->put();
}
void touch_session(Session *session) {
@@ -331,14 +315,14 @@ public:
for (hash_map<entity_name_t,Session*>::iterator p = session_map.begin();
p != session_map.end();
p++)
- if (p->second->inst.name.is_client())
- s.insert(p->second->inst.name.num());
+ if (p->second->info.inst.name.is_client())
+ s.insert(p->second->info.inst.name.num());
}
void get_client_session_set(set<Session*>& s) {
for (hash_map<entity_name_t,Session*>::iterator p = session_map.begin();
p != session_map.end();
p++)
- if (p->second->inst.name.is_client())
+ if (p->second->info.inst.name.is_client())
s.insert(p->second);
}
@@ -355,7 +339,7 @@ public:
// helpers
entity_inst_t& get_inst(entity_name_t w) {
assert(session_map.count(w));
- return session_map[w]->inst;
+ return session_map[w]->info.inst;
}
version_t inc_push_seq(client_t client) {
return get_session(entity_name_t::CLIENT(client.v))->inc_push_seq();
@@ -387,8 +371,10 @@ public:
inodeno_t ino;
list<Context*> waiting_for_load;
- void encode(bufferlist& bl);
+ void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& blp);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<SessionMap*>& ls);
object_t get_object_name();
diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h
index 8eb813469e4..0eff040845f 100644
--- a/src/mds/SimpleLock.h
+++ b/src/mds/SimpleLock.h
@@ -544,22 +544,22 @@ public:
// encode/decode
void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
+ ENCODE_START(2, 2, bl);
::encode(state, bl);
if (have_more())
::encode(more()->gather_set, bl);
else
::encode(empty_gather_set, bl);
+ ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& p) {
- __u8 struct_v;
- ::decode(struct_v, p);
+ DECODE_START(2, p);
::decode(state, p);
set<int> g;
::decode(g, p);
if (!g.empty())
more()->gather_set.swap(g);
+ DECODE_FINISH(p);
}
void encode_state_for_replica(bufferlist& bl) const {
__s16 s = get_replica_state();
diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc
new file mode 100644
index 00000000000..cc9fda76138
--- /dev/null
+++ b/src/mds/SnapRealm.cc
@@ -0,0 +1,488 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "SnapRealm.h"
+#include "MDCache.h"
+#include "MDS.h"
+
+#include "messages/MClientSnap.h"
+
+
+/*
+ * SnapRealm
+ */
+
+#define dout_subsys ceph_subsys_mds
+#undef dout_prefix
+#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
+static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode,
+ uint64_t seq, SnapRealm *realm) {
+ return *_dout << " mds." << whoami
+ << ".cache.snaprealm(" << inode->ino()
+ << " seq " << seq << " " << realm << ") ";
+}
+
+ostream& operator<<(ostream& out, const SnapRealm& realm)
+{
+ out << "snaprealm(" << realm.inode->ino()
+ << " seq " << realm.srnode.seq
+ << " lc " << realm.srnode.last_created
+ << " cr " << realm.srnode.created;
+ if (realm.srnode.created != realm.srnode.current_parent_since)
+ out << " cps " << realm.srnode.current_parent_since;
+ out << " snaps=" << realm.srnode.snaps;
+ if (realm.srnode.past_parents.size()) {
+ out << " past_parents=(";
+ for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
+ p != realm.srnode.past_parents.end();
+ p++) {
+ if (p != realm.srnode.past_parents.begin()) out << ",";
+ out << p->second.first << "-" << p->first
+ << "=" << p->second.ino;
+ }
+ out << ")";
+ }
+ out << " " << &realm << ")";
+ return out;
+}
+
+
+
+
+void SnapRealm::add_open_past_parent(SnapRealm *parent)
+{
+ open_past_parents[parent->inode->ino()] = parent;
+ parent->inode->get(CInode::PIN_PASTSNAPPARENT);
+}
+
+bool SnapRealm::_open_parents(Context *finish, snapid_t first, snapid_t last)
+{
+ dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
+ if (open)
+ return true;
+
+ // make sure my current parents' parents are open...
+ if (parent) {
+ dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
+ << " on " << *parent->inode << dendl;
+ if (last >= srnode.current_parent_since &&
+ !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last))
+ return false;
+ }
+
+ // and my past parents too!
+ assert(srnode.past_parents.size() >= open_past_parents.size());
+ if (srnode.past_parents.size() > open_past_parents.size()) {
+ for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
+ p != srnode.past_parents.end();
+ p++) {
+ dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
+ << p->second.ino << dendl;
+ CInode *parent = mdcache->get_inode(p->second.ino);
+ if (!parent) {
+ mdcache->open_remote_ino(p->second.ino, finish);
+ return false;
+ }
+ assert(parent->snaprealm); // hmm!
+ if (!open_past_parents.count(p->second.ino)) {
+ add_open_past_parent(parent->snaprealm);
+ }
+ if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
+ return false;
+ }
+ }
+
+ open = true;
+ return true;
+}
+
+bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
+{
+ dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
+ if (open)
+ return true;
+
+ for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
+ p != srnode.past_parents.end();
+ p++) {
+ if (p->second.first > last)
+ break;
+ dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
+ << p->second.ino << dendl;
+ if (open_past_parents.count(p->second.ino) == 0) {
+ dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
+ return false;
+ }
+ if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first),
+ MIN(last, p->first)))
+ return false;
+ }
+
+ open = true;
+ return true;
+}
+
+void SnapRealm::close_parents()
+{
+ for (map<inodeno_t,SnapRealm*>::iterator p = open_past_parents.begin();
+ p != open_past_parents.end();
+ p++)
+ p->second->inode->put(CInode::PIN_PASTSNAPPARENT);
+ open_past_parents.clear();
+}
+
+
+/*
+ * get list of snaps for this realm. we must include parents' snaps
+ * for the intervals during which they were our parent.
+ */
+void SnapRealm::build_snap_set(set<snapid_t> &s,
+ snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
+ snapid_t first, snapid_t last)
+{
+ dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl;
+
+ if (srnode.seq > max_seq)
+ max_seq = srnode.seq;
+ if (srnode.last_created > max_last_created)
+ max_last_created = srnode.last_created;
+ if (srnode.last_destroyed > max_last_destroyed)
+ max_last_destroyed = srnode.last_destroyed;
+
+ // include my snaps within interval [first,last]
+ for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
+ p != srnode.snaps.end() && p->first <= last;
+ p++)
+ s.insert(p->first);
+
+ // include snaps for parents during intervals that intersect [first,last]
+ for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
+ p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
+ p++) {
+ CInode *oldparent = mdcache->get_inode(p->second.ino);
+ assert(oldparent); // call open_parents first!
+ assert(oldparent->snaprealm);
+ oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
+ MAX(first, p->second.first),
+ MIN(last, p->first));
+ }
+ if (srnode.current_parent_since <= last && parent)
+ parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
+ MAX(first, srnode.current_parent_since), last);
+}
+
+
+void SnapRealm::check_cache()
+{
+ if (cached_seq >= srnode.seq)
+ return;
+
+ cached_snaps.clear();
+ cached_snap_context.clear();
+
+ cached_last_created = srnode.last_created;
+ cached_last_destroyed = srnode.last_destroyed;
+ cached_seq = srnode.seq;
+ build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed,
+ 0, CEPH_NOSNAP);
+
+ cached_snap_trace.clear();
+ build_snap_trace(cached_snap_trace);
+
+ dout(10) << "check_cache rebuilt " << cached_snaps
+ << " seq " << srnode.seq
+ << " cached_seq " << cached_seq
+ << " cached_last_created " << cached_last_created
+ << " cached_last_destroyed " << cached_last_destroyed
+ << ")" << dendl;
+}
+
+const set<snapid_t>& SnapRealm::get_snaps()
+{
+ check_cache();
+ dout(10) << "get_snaps " << cached_snaps
+ << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
+ << dendl;
+ return cached_snaps;
+}
+
+/*
+ * build vector in reverse sorted order
+ */
+const SnapContext& SnapRealm::get_snap_context()
+{
+ check_cache();
+
+ if (!cached_snap_context.seq) {
+ cached_snap_context.seq = cached_seq;
+ cached_snap_context.snaps.resize(cached_snaps.size());
+ unsigned i = 0;
+ for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
+ p != cached_snaps.rend();
+ p++)
+ cached_snap_context.snaps[i++] = *p;
+ }
+
+ return cached_snap_context;
+}
+
+void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last)
+{
+ const set<snapid_t>& snaps = get_snaps();
+ dout(10) << "get_snap_info snaps " << snaps << dendl;
+
+ // include my snaps within interval [first,last]
+ for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
+ p != srnode.snaps.end() && p->first <= last;
+ p++)
+ infomap[p->first] = &p->second;
+
+ // include snaps for parents during intervals that intersect [first,last]
+ for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
+ p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
+ p++) {
+ CInode *oldparent = mdcache->get_inode(p->second.ino);
+ assert(oldparent); // call open_parents first!
+ assert(oldparent->snaprealm);
+ oldparent->snaprealm->get_snap_info(infomap,
+ MAX(first, p->second.first),
+ MIN(last, p->first));
+ }
+ if (srnode.current_parent_since <= last && parent)
+ parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last);
+}
+
+const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
+{
+ if (srnode.snaps.count(snapid)) {
+ if (atino == inode->ino())
+ return srnode.snaps[snapid].name;
+ else
+ return srnode.snaps[snapid].get_long_name();
+ }
+
+ map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
+ if (p != srnode.past_parents.end() && p->second.first <= snapid) {
+ CInode *oldparent = mdcache->get_inode(p->second.ino);
+ assert(oldparent); // call open_parents first!
+ assert(oldparent->snaprealm);
+ return oldparent->snaprealm->get_snapname(snapid, atino);
+ }
+
+ assert(srnode.current_parent_since <= snapid);
+ assert(parent);
+ return parent->get_snapname(snapid, atino);
+}
+
+snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last)
+{
+ // first try me
+ dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
+
+ //snapid_t num;
+ //if (n[0] == '~') num = atoll(n.c_str()+1);
+
+ bool actual = (atino == inode->ino());
+ string pname;
+ inodeno_t pino;
+ if (!actual) {
+ if (!n.length() ||
+ n[0] != '_') return 0;
+ int next_ = n.find('_', 1);
+ if (next_ < 0) return 0;
+ pname = n.substr(1, next_ - 1);
+ pino = atoll(n.c_str() + next_ + 1);
+ dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
+ }
+
+ for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
+ p != srnode.snaps.end() && p->first <= last;
+ p++) {
+ dout(15) << " ? " << p->second << dendl;
+ //if (num && p->second.snapid == num)
+ //return p->first;
+ if (actual && p->second.name == n)
+ return p->first;
+ if (!actual && p->second.name == pname && p->second.ino == pino)
+ return p->first;
+ }
+
+ // include snaps for parents during intervals that intersect [first,last]
+ for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
+ p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
+ p++) {
+ CInode *oldparent = mdcache->get_inode(p->second.ino);
+ assert(oldparent); // call open_parents first!
+ assert(oldparent->snaprealm);
+ snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
+ MAX(first, p->second.first),
+ MIN(last, p->first));
+ if (r)
+ return r;
+ }
+ if (parent && srnode.current_parent_since <= last)
+ return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last);
+ return 0;
+}
+
+
+void SnapRealm::adjust_parent()
+{
+ SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
+ if (newparent != parent) {
+ dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
+ if (parent)
+ parent->open_children.erase(this);
+ parent = newparent;
+ if (parent)
+ parent->open_children.insert(this);
+
+ invalidate_cached_snaps();
+ }
+}
+
+void SnapRealm::split_at(SnapRealm *child)
+{
+ dout(10) << "split_at " << *child
+ << " on " << *child->inode << dendl;
+
+ if (!child->inode->is_dir()) {
+ // it's not a dir.
+ if (child->inode->containing_realm) {
+ // - no open children.
+ // - only need to move this child's inode's caps.
+ child->inode->move_to_realm(child);
+ } else {
+ // no caps, nothing to move/split.
+ dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
+ assert(!child->inode->is_any_caps());
+ }
+ return;
+ }
+
+ // it's a dir.
+
+ // split open_children
+ dout(10) << " open_children are " << open_children << dendl;
+ for (set<SnapRealm*>::iterator p = open_children.begin();
+ p != open_children.end(); ) {
+ SnapRealm *realm = *p;
+ if (realm != child &&
+ child->inode->is_projected_ancestor_of(realm->inode)) {
+ dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
+ realm->parent = child;
+ child->open_children.insert(realm);
+ open_children.erase(p++);
+ } else {
+ dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
+ p++;
+ }
+ }
+
+ // split inodes_with_caps
+ elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
+ while (!p.end()) {
+ CInode *in = *p;
+ ++p;
+
+ // does inode fall within the child realm?
+ bool under_child = false;
+
+ if (in == child->inode) {
+ under_child = true;
+ } else {
+ CInode *t = in;
+ while (t->get_parent_dn()) {
+ t = t->get_parent_dn()->get_dir()->get_inode();
+ if (t == child->inode) {
+ under_child = true;
+ break;
+ }
+ if (t == in)
+ break;
+ }
+ }
+ if (under_child) {
+ dout(20) << " child gets " << *in << dendl;
+ in->move_to_realm(child);
+ } else {
+ dout(20) << " keeping " << *in << dendl;
+ }
+ }
+
+}
+
+const bufferlist& SnapRealm::get_snap_trace()
+{
+ check_cache();
+ return cached_snap_trace;
+}
+
+void SnapRealm::build_snap_trace(bufferlist& snapbl)
+{
+ SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
+
+ if (parent) {
+ info.h.parent = parent->inode->ino();
+ if (!srnode.past_parents.empty()) {
+ snapid_t last = srnode.past_parents.rbegin()->first;
+ set<snapid_t> past;
+ snapid_t max_seq, max_last_created, max_last_destroyed;
+ build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last);
+ info.prior_parent_snaps.reserve(past.size());
+ for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); p++)
+ info.prior_parent_snaps.push_back(*p);
+ dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] "
+ << info.prior_parent_snaps << dendl;
+ }
+ } else
+ info.h.parent = 0;
+
+ info.my_snaps.reserve(srnode.snaps.size());
+ for (map<snapid_t,SnapInfo>::reverse_iterator p = srnode.snaps.rbegin();
+ p != srnode.snaps.rend();
+ p++)
+ info.my_snaps.push_back(p->first);
+ dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
+
+ ::encode(info, snapbl);
+
+ if (parent)
+ parent->build_snap_trace(snapbl);
+}
+
+
+
+void SnapRealm::prune_past_parents()
+{
+ dout(10) << "prune_past_parents" << dendl;
+ check_cache();
+ assert(open);
+
+ map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
+ while (p != srnode.past_parents.end()) {
+ set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first);
+ if (q == cached_snaps.end() ||
+ *q > p->first) {
+ dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first
+ << "] " << p->second.ino << dendl;
+ srnode.past_parents.erase(p++);
+ } else {
+ dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first
+ << "] " << p->second.ino << dendl;
+ p++;
+ }
+ }
+}
+
diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h
new file mode 100644
index 00000000000..a676b18aa22
--- /dev/null
+++ b/src/mds/SnapRealm.h
@@ -0,0 +1,148 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_SNAPREALM_H
+#define CEPH_MDS_SNAPREALM_H
+
+#include "mdstypes.h"
+#include "snap.h"
+#include "include/xlist.h"
+#include "include/elist.h"
+#include "common/snap_types.h"
+
+struct SnapRealm {
+ // realm state
+
+ sr_t srnode;
+
+ // in-memory state
+ MDCache *mdcache;
+ CInode *inode;
+
+ bool open; // set to true once all past_parents are opened
+ SnapRealm *parent;
+ set<SnapRealm*> open_children; // active children that are currently open
+ map<inodeno_t,SnapRealm*> open_past_parents; // these are explicitly pinned.
+
+ // cache
+ snapid_t cached_seq; // max seq over self and all past+present parents.
+ snapid_t cached_last_created; // max last_created over all past+present parents
+ snapid_t cached_last_destroyed;
+ set<snapid_t> cached_snaps;
+ SnapContext cached_snap_context;
+
+ bufferlist cached_snap_trace;
+
+ elist<CInode*> inodes_with_caps; // for efficient realm splits
+ map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
+
+ SnapRealm(MDCache *c, CInode *in) :
+ srnode(),
+ mdcache(c), inode(in),
+ open(false), parent(0),
+ inodes_with_caps(0)
+ { }
+
+ bool exists(const string &name) {
+ for (map<snapid_t,SnapInfo>::iterator p = srnode.snaps.begin();
+ p != srnode.snaps.end();
+ p++)
+ if (p->second.name == name)
+ return true;
+ return false;
+ }
+
+ bool _open_parents(Context *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP);
+ bool open_parents(Context *retryorfinish) {
+ if (!_open_parents(retryorfinish))
+ return false;
+ delete retryorfinish;
+ return true;
+ }
+ bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP);
+ void add_open_past_parent(SnapRealm *parent);
+ void close_parents();
+
+ void prune_past_parents();
+ bool has_past_parents() { return !srnode.past_parents.empty(); }
+
+ void build_snap_set(set<snapid_t>& s,
+ snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
+ snapid_t first, snapid_t last);
+ void get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
+
+ const bufferlist& get_snap_trace();
+ void build_snap_trace(bufferlist& snapbl);
+
+ const string& get_snapname(snapid_t snapid, inodeno_t atino);
+ snapid_t resolve_snapname(const string &name, inodeno_t atino, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
+
+ void check_cache();
+ const set<snapid_t>& get_snaps();
+ const SnapContext& get_snap_context();
+ void invalidate_cached_snaps() {
+ cached_seq = 0;
+ }
+ snapid_t get_last_created() {
+ check_cache();
+ return cached_last_created;
+ }
+ snapid_t get_last_destroyed() {
+ check_cache();
+ return cached_last_destroyed;
+ }
+ snapid_t get_newest_snap() {
+ check_cache();
+ if (cached_snaps.empty())
+ return 0;
+ else
+ return *cached_snaps.rbegin();
+ }
+ snapid_t get_newest_seq() {
+ check_cache();
+ return cached_seq;
+ }
+
+ snapid_t get_snap_following(snapid_t follows) {
+ check_cache();
+ set<snapid_t> s = get_snaps();
+ set<snapid_t>::iterator p = s.upper_bound(follows);
+ if (p != s.end())
+ return *p;
+ return CEPH_NOSNAP;
+ }
+
+ void adjust_parent();
+
+ void split_at(SnapRealm *child);
+ void join(SnapRealm *child);
+
+ void add_cap(client_t client, Capability *cap) {
+ if (client_caps.count(client) == 0)
+ client_caps[client] = new xlist<Capability*>;
+ client_caps[client]->push_back(&cap->item_snaprealm_caps);
+ }
+ void remove_cap(client_t client, Capability *cap) {
+ cap->item_snaprealm_caps.remove_myself();
+ if (client_caps[client]->empty()) {
+ delete client_caps[client];
+ client_caps.erase(client);
+ }
+ }
+
+};
+
+ostream& operator<<(ostream& out, const SnapRealm &realm);
+
+#endif
diff --git a/src/mds/SnapServer.cc b/src/mds/SnapServer.cc
index a39395c6bd6..57e7e62c2e5 100644
--- a/src/mds/SnapServer.cc
+++ b/src/mds/SnapServer.cc
@@ -242,7 +242,7 @@ void SnapServer::check_osd_map(bool force)
}
}
- if (all_purged.size()) {
+ if (!all_purged.empty()) {
// prepare to remove from need_to_purge list
bufferlist bl;
::encode(all_purged, bl);
diff --git a/src/mds/SnapServer.h b/src/mds/SnapServer.h
index cf8ea6a50b5..79e58e020de 100644
--- a/src/mds/SnapServer.h
+++ b/src/mds/SnapServer.h
@@ -40,23 +40,22 @@ public:
void reset_state();
void encode_server_state(bufferlist& bl) {
- __u8 v = 2;
- ::encode(v, bl);
+ ENCODE_START(3, 3, bl);
::encode(last_snap, bl);
::encode(snaps, bl);
::encode(need_to_purge, bl);
::encode(pending_create, bl);
::encode(pending_destroy, bl);
::encode(pending_noop, bl);
+ ENCODE_FINISH(bl);
}
void decode_server_state(bufferlist::iterator& bl) {
- __u8 v;
- ::decode(v, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
::decode(last_snap, bl);
::decode(snaps, bl);
::decode(need_to_purge, bl);
::decode(pending_create, bl);
- if (v >= 2)
+ if (struct_v >= 2)
::decode(pending_destroy, bl);
else {
map<version_t, snapid_t> t;
@@ -65,6 +64,7 @@ public:
pending_destroy[p->first].first = p->second;
}
::decode(pending_noop, bl);
+ DECODE_FINISH(bl);
}
// server bits
diff --git a/src/mds/events/ECommitted.h b/src/mds/events/ECommitted.h
index dfc84a515e7..2889a3b032d 100644
--- a/src/mds/events/ECommitted.h
+++ b/src/mds/events/ECommitted.h
@@ -26,23 +26,14 @@ public:
ECommitted(metareqid_t r) :
LogEvent(EVENT_COMMITTED), reqid(r) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "ECommitted " << reqid;
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(reqid, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(reqid, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ECommitted*>& ls);
void update_segment() {}
void replay(MDS *mds);
diff --git a/src/mds/events/EExport.h b/src/mds/events/EExport.h
index 3313d17f038..082e14babb8 100644
--- a/src/mds/events/EExport.h
+++ b/src/mds/events/EExport.h
@@ -21,6 +21,7 @@
#include "../MDS.h"
#include "EMetaBlob.h"
+#include "../LogEvent.h"
class EExport : public LogEvent {
public:
@@ -37,28 +38,14 @@ public:
set<dirfrag_t> &get_bounds() { return bounds; }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "EExport " << base << " " << metablob;
}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(metablob, bl);
- ::encode(base, bl);
- ::encode(bounds, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(metablob, bl);
- ::decode(base, bl);
- ::decode(bounds, bl);
- }
-
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EExport*>& ls);
void replay(MDS *mds);
};
diff --git a/src/mds/events/EFragment.h b/src/mds/events/EFragment.h
index 3c9a93b549d..bdbbd335e29 100644
--- a/src/mds/events/EFragment.h
+++ b/src/mds/events/EFragment.h
@@ -30,7 +30,8 @@ public:
EFragment(MDLog *mdlog, int o, inodeno_t i, frag_t bf, int b) :
LogEvent(EVENT_FRAGMENT), metablob(mdlog),
op(o), ino(i), basefrag(bf), bits(b) { }
- void print(ostream& out) {
+
+ void print(ostream& out) const {
out << "EFragment " << op_name(op) << " " << ino << " " << basefrag << " by " << bits << " " << metablob;
}
@@ -40,7 +41,7 @@ public:
OP_ROLLBACK = 3,
OP_ONESHOT = 4, // (legacy) PREPARE+COMMIT
};
- const char *op_name(int o) {
+ const char *op_name(int o) const {
switch (o) {
case OP_PREPARE: return "prepare";
case OP_COMMIT: return "commit";
@@ -49,31 +50,10 @@ public:
}
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 3;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(op, bl);
- ::encode(ino, bl);
- ::encode(basefrag, bl);
- ::encode(bits, bl);
- ::encode(metablob, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- if (struct_v >= 3)
- ::decode(op, bl);
- else
- op = OP_ONESHOT;
- ::decode(ino, bl);
- ::decode(basefrag, bl);
- ::decode(bits, bl);
- ::decode(metablob, bl);
- }
-
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EFragment*>& ls);
void replay(MDS *mds);
};
diff --git a/src/mds/events/EImportFinish.h b/src/mds/events/EImportFinish.h
index f8c8b39838d..7ed25e15bef 100644
--- a/src/mds/events/EImportFinish.h
+++ b/src/mds/events/EImportFinish.h
@@ -19,6 +19,7 @@
#include "include/types.h"
#include "../MDS.h"
+#include "../LogEvent.h"
class EImportFinish : public LogEvent {
protected:
@@ -29,9 +30,9 @@ class EImportFinish : public LogEvent {
EImportFinish(CDir *dir, bool s) : LogEvent(EVENT_IMPORTFINISH),
base(dir->dirfrag()),
success(s) { }
- EImportFinish() : LogEvent(EVENT_IMPORTFINISH) { }
+ EImportFinish() : LogEvent(EVENT_IMPORTFINISH), base(), success(false) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "EImportFinish " << base;
if (success)
out << " success";
@@ -39,21 +40,10 @@ class EImportFinish : public LogEvent {
out << " failed";
}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(base, bl);
- ::encode(success, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(base, bl);
- ::decode(success, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EImportFinish*>& ls);
void replay(MDS *mds);
diff --git a/src/mds/events/EImportStart.h b/src/mds/events/EImportStart.h
index 0d5f275e0ec..0f55190139c 100644
--- a/src/mds/events/EImportStart.h
+++ b/src/mds/events/EImportStart.h
@@ -21,6 +21,7 @@
#include "../MDS.h"
#include "EMetaBlob.h"
+#include "../LogEvent.h"
class EImportStart : public LogEvent {
protected:
@@ -39,31 +40,14 @@ protected:
metablob(log) { }
EImportStart() : LogEvent(EVENT_IMPORTSTART) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "EImportStart " << base << " " << metablob;
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(base, bl);
- ::encode(metablob, bl);
- ::encode(bounds, bl);
- ::encode(cmapv, bl);
- ::encode(client_map, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(base, bl);
- ::decode(metablob, bl);
- ::decode(bounds, bl);
- ::decode(cmapv, bl);
- ::decode(client_map, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EImportStart*>& ls);
void update_segment();
void replay(MDS *mds);
diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h
index bd0a8f7b4db..d1baefe9402 100644
--- a/src/mds/events/EMetaBlob.h
+++ b/src/mds/events/EMetaBlob.h
@@ -68,7 +68,7 @@ public:
string symlink;
bufferlist snapbl;
bool dirty;
- struct default_file_layout *dir_layout;
+ struct file_layout_policy_t *dir_layout;
typedef map<snapid_t, old_inode_t> old_inodes_t;
old_inodes_t old_inodes;
@@ -78,10 +78,10 @@ public:
const fullbit& operator=(const fullbit& o);
fullbit(const string& d, snapid_t df, snapid_t dl,
- version_t v, inode_t& i, fragtree_t &dft,
- map<string,bufferptr> &xa, const string& sym,
- bufferlist &sbl, bool dr, default_file_layout *defl = NULL,
- old_inodes_t *oi = NULL) :
+ version_t v, const inode_t& i, const fragtree_t &dft,
+ const map<string,bufferptr> &xa, const string& sym,
+ const bufferlist &sbl, bool dr, const file_layout_policy_t *defl = NULL,
+ const old_inodes_t *oi = NULL) :
//dn(d), dnfirst(df), dnlast(dl), dnv(v),
//inode(i), dirfragtree(dft), xattrs(xa), symlink(sym), snapbl(sbl), dirty(dr)
dir_layout(NULL), _enc(1024)
@@ -114,48 +114,13 @@ public:
delete dir_layout;
}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 3;
- ::encode(struct_v, bl);
- assert(_enc.length());
- bl.append(_enc);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(dn, bl);
- ::decode(dnfirst, bl);
- ::decode(dnlast, bl);
- ::decode(dnv, bl);
- ::decode(inode, bl);
- ::decode(xattrs, bl);
- if (inode.is_symlink())
- ::decode(symlink, bl);
- if (inode.is_dir()) {
- ::decode(dirfragtree, bl);
- ::decode(snapbl, bl);
- if (struct_v >= 2) {
- bool dir_layout_exists;
- ::decode(dir_layout_exists, bl);
- if (dir_layout_exists) {
- dir_layout = new default_file_layout;
- ::decode(*dir_layout, bl);
- }
- }
- }
- ::decode(dirty, bl);
- if (struct_v >= 3) {
- bool old_inodes_present;
- ::decode(old_inodes_present, bl);
- if (old_inodes_present) {
- ::decode(old_inodes, bl);
- }
- }
- }
-
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EMetaBlob::fullbit*>& ls);
void update_inode(MDS *mds, CInode *in);
- void print(ostream& out) {
+ void print(ostream& out) const {
out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
<< " inode " << inode.ino
<< " dirty=" << dirty << std::endl;
@@ -187,39 +152,18 @@ public:
::encode(dr, _enc);
}
remotebit(bufferlist::iterator &p) { decode(p); }
- remotebit() {}
-
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- assert(_enc.length());
- bl.append(_enc);
- /*
- ::encode(dn, bl);
- ::encode(dnfirst, bl);
- ::encode(dnlast, bl);
- ::encode(dnv, bl);
- ::encode(ino, bl);
- ::encode(d_type, bl);
- ::encode(dirty, bl);
- */
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(dn, bl);
- ::decode(dnfirst, bl);
- ::decode(dnlast, bl);
- ::decode(dnv, bl);
- ::decode(ino, bl);
- ::decode(d_type, bl);
- ::decode(dirty, bl);
- }
- void print(ostream& out) {
+ remotebit(): dnfirst(0), dnlast(0), dnv(0), ino(0),
+ d_type('\0'), dirty(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void print(ostream& out) const {
out << " remotebit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
<< " ino " << ino
<< " dirty=" << dirty << std::endl;
}
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<remotebit*>& ls);
};
WRITE_CLASS_ENCODER(remotebit)
@@ -244,30 +188,12 @@ public:
::encode(dr, _enc);
}
nullbit(bufferlist::iterator &p) { decode(p); }
- nullbit() {}
-
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- assert(_enc.length());
- bl.append(_enc);
- /*
- ::encode(dn, bl);
- ::encode(dnfirst, bl);
- ::encode(dnlast, bl);
- ::encode(dnv, bl);
- ::encode(dirty, bl);
- */
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(dn, bl);
- ::decode(dnfirst, bl);
- ::decode(dnlast, bl);
- ::decode(dnv, bl);
- ::decode(dirty, bl);
- }
+ nullbit(): dnfirst(0), dnlast(0), dnv(0), dirty(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<nullbit*>& ls);
void print(ostream& out) {
out << " nullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
<< " dirty=" << dirty << std::endl;
@@ -300,11 +226,11 @@ public:
public:
dirlump() : state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { }
- bool is_complete() { return state & STATE_COMPLETE; }
+ bool is_complete() const { return state & STATE_COMPLETE; }
void mark_complete() { state |= STATE_COMPLETE; }
- bool is_dirty() { return state & STATE_DIRTY; }
+ bool is_dirty() const { return state & STATE_DIRTY; }
void mark_dirty() { state |= STATE_DIRTY; }
- bool is_new() { return state & STATE_NEW; }
+ bool is_new() const { return state & STATE_NEW; }
void mark_new() { state |= STATE_NEW; }
bool is_importing() { return state & STATE_IMPORTING; }
void mark_importing() { state |= STATE_IMPORTING; }
@@ -327,7 +253,26 @@ public:
p->print(out);
}
+ string state_string() const {
+ string state_string;
+ bool marked_already = false;
+ if (is_complete()) {
+ state_string.append("complete");
+ marked_already = true;
+ }
+ if (is_dirty()) {
+ state_string.append(marked_already ? "+dirty" : "dirty");
+ marked_already = true;
+ }
+ if (is_new()) {
+ state_string.append(marked_already ? "+new" : "new");
+ }
+ return state_string;
+ }
+
+ // if this changes, update the versioning in encode for it!
void _encode_bits() const {
+ if (!dn_decoded) return;
::encode(dfull, dnbl);
::encode(dremote, dnbl);
::encode(dnull, dnbl);
@@ -341,28 +286,10 @@ public:
dn_decoded = true;
}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(fnode, bl);
- ::encode(state, bl);
- ::encode(nfull, bl);
- ::encode(nremote, bl);
- ::encode(nnull, bl);
- _encode_bits();
- ::encode(dnbl, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(fnode, bl);
- ::decode(state, bl);
- ::decode(nfull, bl);
- ::decode(nremote, bl);
- ::decode(nnull, bl);
- ::decode(dnbl, bl);
- dn_decoded = false; // don't decode bits unless we need them.
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<dirlump*>& ls);
};
WRITE_CLASS_ENCODER(dirlump)
@@ -397,70 +324,10 @@ private:
list<pair<metareqid_t,uint64_t> > client_reqs;
public:
- void encode(bufferlist& bl) const {
- __u8 struct_v = 4;
- ::encode(struct_v, bl);
- ::encode(lump_order, bl);
- ::encode(lump_map, bl);
- ::encode(roots, bl);
- ::encode(table_tids, bl);
- ::encode(opened_ino, bl);
- ::encode(allocated_ino, bl);
- ::encode(used_preallocated_ino, bl);
- ::encode(preallocated_inos, bl);
- ::encode(client_name, bl);
- ::encode(inotablev, bl);
- ::encode(sessionmapv, bl);
- ::encode(truncate_start, bl);
- ::encode(truncate_finish, bl);
- ::encode(destroyed_inodes, bl);
- ::encode(client_reqs, bl);
- ::encode(renamed_dirino, bl);
- ::encode(renamed_dir_frags, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(lump_order, bl);
- ::decode(lump_map, bl);
- if (struct_v >= 4) {
- ::decode(roots, bl);
- } else {
- bufferlist rootbl;
- ::decode(rootbl, bl);
- if (rootbl.length()) {
- bufferlist::iterator p = rootbl.begin();
- roots.push_back(std::tr1::shared_ptr<fullbit>(new fullbit(p)));
- }
- }
- ::decode(table_tids, bl);
- ::decode(opened_ino, bl);
- ::decode(allocated_ino, bl);
- ::decode(used_preallocated_ino, bl);
- ::decode(preallocated_inos, bl);
- ::decode(client_name, bl);
- ::decode(inotablev, bl);
- ::decode(sessionmapv, bl);
- ::decode(truncate_start, bl);
- ::decode(truncate_finish, bl);
- ::decode(destroyed_inodes, bl);
- if (struct_v >= 2) {
- ::decode(client_reqs, bl);
- } else {
- list<metareqid_t> r;
- ::decode(r, bl);
- while (!r.empty()) {
- client_reqs.push_back(pair<metareqid_t,uint64_t>(r.front(), 0));
- r.pop_front();
- }
- }
- if (struct_v >= 3) {
- ::decode(renamed_dirino, bl);
- ::decode(renamed_dir_frags, bl);
- }
- }
-
-
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EMetaBlob*>& ls);
// soft stateadd
uint64_t last_subtree_map;
uint64_t my_offset;
@@ -562,7 +429,7 @@ private:
//cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl;
inode_t *pi = in->get_projected_inode();
- default_file_layout *default_layout = NULL;
+ file_layout_policy_t *default_layout = NULL;
if (in->is_dir())
default_layout = (in->get_projected_node() ?
in->get_projected_node()->dir_layout :
@@ -611,7 +478,7 @@ private:
if (!pdft) pdft = &in->dirfragtree;
if (!px) px = &in->xattrs;
- default_file_layout *default_layout = NULL;
+ file_layout_policy_t *default_layout = NULL;
if (in->is_dir())
default_layout = (in->get_projected_node() ?
in->get_projected_node()->dir_layout :
diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h
index 1919b073827..792540ef5da 100644
--- a/src/mds/events/EOpen.h
+++ b/src/mds/events/EOpen.h
@@ -27,7 +27,7 @@ public:
EOpen(MDLog *mdlog) :
LogEvent(EVENT_OPEN), metablob(mdlog) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "EOpen " << metablob << ", " << inos.size() << " open files";
}
@@ -42,21 +42,10 @@ public:
inos.push_back(ino);
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(metablob, bl);
- ::encode(inos, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(metablob, bl);
- ::decode(inos, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EOpen*>& ls);
void update_segment();
void replay(MDS *mds);
diff --git a/src/mds/events/EResetJournal.h b/src/mds/events/EResetJournal.h
index 4f5bab8c285..c782f29a8dd 100644
--- a/src/mds/events/EResetJournal.h
+++ b/src/mds/events/EResetJournal.h
@@ -24,18 +24,11 @@ class EResetJournal : public LogEvent {
EResetJournal() : LogEvent(EVENT_RESETJOURNAL) { }
~EResetJournal() {}
- void encode(bufferlist& bl) const {
- __u8 v = 1;
- ::encode(v, bl);
- ::encode(stamp, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 v;
- ::decode(v, bl);
- ::decode(stamp, bl);
- }
-
- void print(ostream& out) {
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EResetJournal*>& ls);
+ void print(ostream& out) const {
out << "EResetJournal";
}
diff --git a/src/mds/events/ESession.h b/src/mds/events/ESession.h
index 1580d1234f9..91ad6c45ae4 100644
--- a/src/mds/events/ESession.h
+++ b/src/mds/events/ESession.h
@@ -46,30 +46,12 @@ class ESession : public LogEvent {
cmapv(v),
inos(i), inotablev(iv) { }
- void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(client_inst, bl);
- ::encode(open, bl);
- ::encode(cmapv, bl);
- ::encode(inos, bl);
- ::encode(inotablev, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(client_inst, bl);
- ::decode(open, bl);
- ::decode(cmapv, bl);
- ::decode(inos, bl);
- ::decode(inotablev, bl);
- }
-
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ESession*>& ls);
- void print(ostream& out) {
+ void print(ostream& out) const {
if (open)
out << "ESession " << client_inst << " open cmapv " << cmapv;
else
diff --git a/src/mds/events/ESessions.h b/src/mds/events/ESessions.h
index 9b090cee394..fe943a881fd 100644
--- a/src/mds/events/ESessions.h
+++ b/src/mds/events/ESessions.h
@@ -26,28 +26,29 @@ protected:
public:
map<client_t,entity_inst_t> client_map;
+ bool old_style_encode;
- ESessions() : LogEvent(EVENT_SESSIONS) { }
+ ESessions() : LogEvent(EVENT_SESSIONS), old_style_encode(false) { }
ESessions(version_t pv, map<client_t,entity_inst_t>& cm) :
LogEvent(EVENT_SESSIONS),
- cmapv(pv) {
+ cmapv(pv),
+ old_style_encode(false) {
client_map.swap(cm);
}
-
- void encode(bufferlist &bl) const {
- ::encode(client_map, bl);
- ::encode(cmapv, bl);
- ::encode(stamp, bl);
- }
+
+ void mark_old_encoding() { old_style_encode = true; }
+
+ void encode(bufferlist &bl) const;
+ void decode_old(bufferlist::iterator &bl);
+ void decode_new(bufferlist::iterator &bl);
void decode(bufferlist::iterator &bl) {
- ::decode(client_map, bl);
- ::decode(cmapv, bl);
- if (!bl.end())
- ::decode(stamp, bl);
+ if (old_style_encode) decode_old(bl);
+ else decode_new(bl);
}
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ESessions*>& ls);
-
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "ESessions " << client_map.size() << " opens cmapv " << cmapv;
}
diff --git a/src/mds/events/ESlaveUpdate.h b/src/mds/events/ESlaveUpdate.h
index 452c85ae3cb..40d9c22f6d4 100644
--- a/src/mds/events/ESlaveUpdate.h
+++ b/src/mds/events/ESlaveUpdate.h
@@ -31,26 +31,12 @@ struct link_rollback {
utime_t old_dir_mtime;
utime_t old_dir_rctime;
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(reqid, bl);
- ::encode(ino, bl);
- ::encode(was_inc, bl);
- ::encode(old_ctime, bl);
- ::encode(old_dir_mtime, bl);
- ::encode(old_dir_rctime, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(reqid, bl);
- ::decode(ino, bl);
- ::decode(was_inc, bl);
- ::decode(old_ctime, bl);
- ::decode(old_dir_mtime, bl);
- ::decode(old_dir_rctime, bl);
- }
+ link_rollback() : ino(0), was_inc(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<link_rollback*>& ls);
};
WRITE_CLASS_ENCODER(link_rollback)
@@ -67,24 +53,10 @@ struct rmdir_rollback {
dirfrag_t dest_dir;
string dest_dname;
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(reqid, bl);
- ::encode(src_dir, bl);
- ::encode(src_dname, bl);
- ::encode(dest_dir, bl);
- ::encode(dest_dname, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(reqid, bl);
- ::decode(src_dir, bl);
- ::decode(src_dname, bl);
- ::decode(dest_dir, bl);
- ::decode(dest_dname, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rmdir_rollback*>& ls);
};
WRITE_CLASS_ENCODER(rmdir_rollback)
@@ -98,30 +70,10 @@ struct rename_rollback {
char remote_d_type;
utime_t old_ctime;
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(dirfrag, bl);
- ::encode(dirfrag_old_mtime, bl);
- ::encode(dirfrag_old_rctime, bl);
- ::encode(ino, bl);
- ::encode(remote_ino, bl);
- ::encode(dname, bl);
- ::encode(remote_d_type, bl);
- ::encode(old_ctime, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(dirfrag, bl);
- ::decode(dirfrag_old_mtime, bl);
- ::decode(dirfrag_old_rctime, bl);
- ::decode(ino, bl);
- ::decode(remote_ino, bl);
- ::decode(dname, bl);
- ::decode(remote_d_type, bl);
- ::decode(old_ctime, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<drec*>& ls);
};
WRITE_CLASS_MEMBER_ENCODER(drec)
@@ -130,24 +82,10 @@ struct rename_rollback {
drec stray; // we know this is null, but we want dname, old mtime/rctime
utime_t ctime;
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(reqid, bl);
- encode(orig_src, bl);
- encode(orig_dest, bl);
- encode(stray, bl);
- ::encode(ctime, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(reqid, bl);
- decode(orig_src, bl);
- decode(orig_dest, bl);
- decode(stray, bl);
- ::decode(ctime, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rename_rollback*>& ls);
};
WRITE_CLASS_ENCODER(rename_rollback)
@@ -177,7 +115,7 @@ public:
__u8 op; // prepare, commit, abort
__u8 origop; // link | rename
- ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE) { }
+ ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE), master(0), op(0), origop(0) { }
ESlaveUpdate(MDLog *mdlog, const char *s, metareqid_t ri, int mastermds, int o, int oo) :
LogEvent(EVENT_SLAVEUPDATE), commit(mdlog),
type(s),
@@ -185,7 +123,7 @@ public:
master(mastermds),
op(o), origop(oo) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
if (type.length())
out << type << " ";
out << " " << (int)op;
@@ -196,31 +134,10 @@ public:
out << commit;
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(type, bl);
- ::encode(reqid, bl);
- ::encode(master, bl);
- ::encode(op, bl);
- ::encode(origop, bl);
- ::encode(commit, bl);
- ::encode(rollback, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(type, bl);
- ::decode(reqid, bl);
- ::decode(master, bl);
- ::decode(op, bl);
- ::decode(origop, bl);
- ::decode(commit, bl);
- ::decode(rollback, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ESlaveUpdate*>& ls);
void replay(MDS *mds);
};
diff --git a/src/mds/events/EString.h b/src/mds/events/EString.h
deleted file mode 100644
index aa50514185a..00000000000
--- a/src/mds/events/EString.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-
-#ifndef CEPH_ESTRING_H
-#define CEPH_ESTRING_H
-
-#include <stdlib.h>
-#include <string>
-using namespace std;
-
-#include "../LogEvent.h"
-
-// generic log event
-class EString : public LogEvent {
- protected:
- string event;
-
- public:
- EString(string e) :
- LogEvent(EVENT_STRING) {
- event = e;
- }
- EString() :
- LogEvent(EVENT_STRING) {
- }
-
- void encode(bufferlist& bl) const {
- ::encode(event, bl);
- ::encode(stamp, bl);
- }
- void decode(bufferlist::iterator &bl) {
- ::decode(event, bl);
- if (!bl.end())
- ::decode(stamp, bl);
- }
-
- void print(ostream& out) {
- out << '"' << event << '"';
- }
-
- void replay(MDS *mds);
-
-};
-
-#endif
diff --git a/src/mds/events/ESubtreeMap.h b/src/mds/events/ESubtreeMap.h
index 0230de1a59e..32a4abe5180 100644
--- a/src/mds/events/ESubtreeMap.h
+++ b/src/mds/events/ESubtreeMap.h
@@ -27,33 +27,16 @@ public:
ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP), expire_pos(0) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "ESubtreeMap " << subtrees.size() << " subtrees "
<< ", " << ambiguous_subtrees.size() << " ambiguous "
<< metablob;
}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 4;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(metablob, bl);
- ::encode(subtrees, bl);
- ::encode(ambiguous_subtrees, bl);
- ::encode(expire_pos, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(metablob, bl);
- ::decode(subtrees, bl);
- if (struct_v >= 4)
- ::decode(ambiguous_subtrees, bl);
- if (struct_v >= 3)
- ::decode(expire_pos, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ESubtreeMap*>& ls);
void replay(MDS *mds);
};
diff --git a/src/mds/events/ETableClient.h b/src/mds/events/ETableClient.h
index ba570fb36ac..e415e60bd85 100644
--- a/src/mds/events/ETableClient.h
+++ b/src/mds/events/ETableClient.h
@@ -26,30 +26,17 @@ struct ETableClient : public LogEvent {
__s16 op;
version_t tid;
- ETableClient() : LogEvent(EVENT_TABLECLIENT) { }
+ ETableClient() : LogEvent(EVENT_TABLECLIENT), table(0), op(0), tid(0) { }
ETableClient(int t, int o, version_t ti) :
LogEvent(EVENT_TABLECLIENT),
table(t), op(o), tid(ti) { }
- void encode(bufferlist& bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(table, bl);
- ::encode(op, bl);
- ::encode(tid, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(table, bl);
- ::decode(op, bl);
- ::decode(tid, bl);
- }
-
- void print(ostream& out) {
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ETableClient*>& ls);
+
+ void print(ostream& out) const {
out << "ETableClient " << get_mdstable_name(table) << " " << get_mdstableserver_opname(op);
if (tid) out << " tid " << tid;
}
diff --git a/src/mds/events/ETableServer.h b/src/mds/events/ETableServer.h
index 6818e8557ba..132d3b6a6c9 100644
--- a/src/mds/events/ETableServer.h
+++ b/src/mds/events/ETableServer.h
@@ -30,38 +30,18 @@ struct ETableServer : public LogEvent {
version_t tid;
version_t version;
- ETableServer() : LogEvent(EVENT_TABLESERVER) { }
+ ETableServer() : LogEvent(EVENT_TABLESERVER), table(0), op(0),
+ reqid(0), bymds(0), tid(0), version(0) { }
ETableServer(int t, int o, uint64_t ri, int m, version_t ti, version_t v) :
LogEvent(EVENT_TABLESERVER),
table(t), op(o), reqid(ri), bymds(m), tid(ti), version(v) { }
- void encode(bufferlist& bl) const {
- __u8 struct_v = 2;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(table, bl);
- ::encode(op, bl);
- ::encode(reqid, bl);
- ::encode(bymds, bl);
- ::encode(mutation, bl);
- ::encode(tid, bl);
- ::encode(version, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(table, bl);
- ::decode(op, bl);
- ::decode(reqid, bl);
- ::decode(bymds, bl);
- ::decode(mutation, bl);
- ::decode(tid, bl);
- ::decode(version, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<ETableServer*>& ls);
- void print(ostream& out) {
+ void print(ostream& out) const {
out << "ETableServer " << get_mdstable_name(table)
<< " " << get_mdstableserver_opname(op);
if (reqid) out << " reqid " << reqid;
diff --git a/src/mds/events/EUpdate.h b/src/mds/events/EUpdate.h
index a302a5a2b6f..645386e2511 100644
--- a/src/mds/events/EUpdate.h
+++ b/src/mds/events/EUpdate.h
@@ -27,41 +27,21 @@ public:
metareqid_t reqid;
bool had_slaves;
- EUpdate() : LogEvent(EVENT_UPDATE) { }
+ EUpdate() : LogEvent(EVENT_UPDATE), cmapv(0), had_slaves(false) { }
EUpdate(MDLog *mdlog, const char *s) :
LogEvent(EVENT_UPDATE), metablob(mdlog),
type(s), cmapv(0), had_slaves(false) { }
- void print(ostream& out) {
+ void print(ostream& out) const {
if (type.length())
out << "EUpdate " << type << " ";
out << metablob;
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 3;
- ::encode(struct_v, bl);
- ::encode(stamp, bl);
- ::encode(type, bl);
- ::encode(metablob, bl);
- ::encode(client_map, bl);
- ::encode(cmapv, bl);
- ::encode(reqid, bl);
- ::encode(had_slaves, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- if (struct_v >= 2)
- ::decode(stamp, bl);
- ::decode(type, bl);
- ::decode(metablob, bl);
- ::decode(client_map, bl);
- if (struct_v >= 3)
- ::decode(cmapv, bl);
- ::decode(reqid, bl);
- ::decode(had_slaves, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EUpdate*>& ls);
void update_segment();
void replay(MDS *mds);
diff --git a/src/mds/inode_backtrace.cc b/src/mds/inode_backtrace.cc
new file mode 100644
index 00000000000..c0457b28ff7
--- /dev/null
+++ b/src/mds/inode_backtrace.cc
@@ -0,0 +1,105 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "inode_backtrace.h"
+
+#include "common/Formatter.h"
+
+/* inode_backpointer_t */
+
+void inode_backpointer_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(dirino, bl);
+ ::encode(dname, bl);
+ ::encode(version, bl);
+ ENCODE_FINISH(bl);
+}
+
+void inode_backpointer_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(dirino, bl);
+ ::decode(dname, bl);
+ ::decode(version, bl);
+ DECODE_FINISH(bl);
+}
+
+void inode_backpointer_t::decode_old(bufferlist::iterator& bl)
+{
+ ::decode(dirino, bl);
+ ::decode(dname, bl);
+ ::decode(version, bl);
+}
+
+void inode_backpointer_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("dirino", dirino);
+ f->dump_string("dname", dname);
+ f->dump_unsigned("version", version);
+}
+
+void inode_backpointer_t::generate_test_instances(list<inode_backpointer_t*>& ls)
+{
+ ls.push_back(new inode_backpointer_t);
+ ls.push_back(new inode_backpointer_t);
+ ls.back()->dirino = 1;
+ ls.back()->dname = "foo";
+ ls.back()->version = 123;
+}
+
+
+/*
+ * inode_backtrace_t
+ */
+
+void inode_backtrace_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(4, 4, bl);
+ ::encode(ino, bl);
+ ::encode(ancestors, bl);
+ ENCODE_FINISH(bl);
+}
+
+void inode_backtrace_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ if (struct_v < 3)
+ return; // sorry, the old data was crap
+ ::decode(ino, bl);
+ if (struct_v >= 4) {
+ ::decode(ancestors, bl);
+ } else {
+ __u32 n;
+ ::decode(n, bl);
+ while (n--) {
+ ancestors.push_back(inode_backpointer_t());
+ ancestors.back().decode_old(bl);
+ }
+ }
+ DECODE_FINISH(bl);
+}
+
+void inode_backtrace_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("ino", ino);
+ f->open_array_section("ancestors");
+ for (vector<inode_backpointer_t>::const_iterator p = ancestors.begin(); p != ancestors.end(); ++p) {
+ f->open_object_section("backpointer");
+ p->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void inode_backtrace_t::generate_test_instances(list<inode_backtrace_t*>& ls)
+{
+ ls.push_back(new inode_backtrace_t);
+ ls.push_back(new inode_backtrace_t);
+ ls.back()->ino = 1;
+ ls.back()->ancestors.push_back(inode_backpointer_t());
+ ls.back()->ancestors.back().dirino = 123;
+ ls.back()->ancestors.back().dname = "bar";
+ ls.back()->ancestors.back().version = 456;
+}
+
diff --git a/src/mds/inode_backtrace.h b/src/mds/inode_backtrace.h
index 24ca30983a2..6b512913fd9 100644
--- a/src/mds/inode_backtrace.h
+++ b/src/mds/inode_backtrace.h
@@ -3,6 +3,12 @@
#ifndef CEPH_INODE_BACKTRACE_H
#define CEPH_INODE_BACKTRACE_H
+#include "mdstypes.h"
+
+namespace ceph {
+ class Formatter;
+}
+
/** metadata backpointers **/
/*
@@ -21,16 +27,11 @@ struct inode_backpointer_t {
inode_backpointer_t() : version(0) {}
inode_backpointer_t(inodeno_t i, const string &d, version_t v) : dirino(i), dname(d), version(v) {}
- void encode(bufferlist& bl) const {
- ::encode(dirino, bl);
- ::encode(dname, bl);
- ::encode(version, bl);
- }
- void decode(bufferlist::iterator& bl) {
- ::decode(dirino, bl);
- ::decode(dname, bl);
- ::decode(version, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void decode_old(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<inode_backpointer_t*>& ls);
};
WRITE_CLASS_ENCODER(inode_backpointer_t)
@@ -47,21 +48,10 @@ struct inode_backtrace_t {
inodeno_t ino; // my ino
vector<inode_backpointer_t> ancestors;
- void encode(bufferlist& bl) const {
- __u8 v = 3;
- ::encode(v, bl);
- ::encode(ino, bl);
- ::encode(ancestors, bl);
- }
-
- void decode(bufferlist::iterator& bl) {
- __u8 v;
- ::decode(v, bl);
- if (v < 3)
- return; // sorry, the old data was crap
- ::decode(ino, bl);
- ::decode(ancestors, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<inode_backtrace_t*>& ls);
};
WRITE_CLASS_ENCODER(inode_backtrace_t)
diff --git a/src/mds/journal.cc b/src/mds/journal.cc
index 12f488c0cf1..1fb58c6b7ca 100644
--- a/src/mds/journal.cc
+++ b/src/mds/journal.cc
@@ -14,7 +14,6 @@
#include "common/config.h"
#include "osdc/Journaler.h"
-#include "events/EString.h"
#include "events/ESubtreeMap.h"
#include "events/ESession.h"
#include "events/ESessions.h"
@@ -267,16 +266,6 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld)
// -----------------------
-// EString
-
-void EString::replay(MDS *mds)
-{
- dout(10) << "EString.replay " << event << dendl;
-}
-
-
-
-// -----------------------
// EMetaBlob
EMetaBlob::EMetaBlob(MDLog *mdlog) : opened_ino(0), renamed_dirino(0),
@@ -390,6 +379,119 @@ void EMetaBlob::update_segment(LogSegment *ls)
// ls->last_client_tid[client_reqs.rbegin()->client] = client_reqs.rbegin()->tid);
}
+// EMetaBlob::fullbit
+
+void EMetaBlob::fullbit::encode(bufferlist& bl) const {
+ ENCODE_START(4, 4, bl);
+ if (!_enc.length()) {
+ fullbit copy(dn, dnfirst, dnlast, dnv, inode, dirfragtree, xattrs, symlink,
+ snapbl, dirty, dir_layout, &old_inodes);
+ bl.append(copy._enc);
+ } else {
+ bl.append(_enc);
+ }
+ ENCODE_FINISH(bl);
+}
+
+void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ ::decode(dn, bl);
+ ::decode(dnfirst, bl);
+ ::decode(dnlast, bl);
+ ::decode(dnv, bl);
+ ::decode(inode, bl);
+ ::decode(xattrs, bl);
+ if (inode.is_symlink())
+ ::decode(symlink, bl);
+ if (inode.is_dir()) {
+ ::decode(dirfragtree, bl);
+ ::decode(snapbl, bl);
+ if (struct_v >= 2) {
+ bool dir_layout_exists;
+ ::decode(dir_layout_exists, bl);
+ if (dir_layout_exists) {
+ dir_layout = new file_layout_policy_t;
+ ::decode(*dir_layout, bl);
+ }
+ }
+ }
+ ::decode(dirty, bl);
+ if (struct_v >= 3) {
+ bool old_inodes_present;
+ ::decode(old_inodes_present, bl);
+ if (old_inodes_present) {
+ ::decode(old_inodes, bl);
+ }
+ }
+ DECODE_FINISH(bl);
+}
+
+void EMetaBlob::fullbit::dump(Formatter *f) const
+{
+ if (_enc.length() && !dn.length()) {
+ /* if our bufferlist has data but our name is empty, we
+ * haven't initialized ourselves; do so in order to print members!
+ * We use const_cast here because the whole point is we aren't
+ * fully set up and this isn't changing who we "are", just our
+ * representation.
+ */
+ EMetaBlob::fullbit *me = const_cast<EMetaBlob::fullbit*>(this);
+ bufferlist encoded;
+ encode(encoded);
+ bufferlist::iterator p = encoded.begin();
+ me->decode(p);
+ }
+ f->dump_string("dentry", dn);
+ f->dump_stream("snapid.first") << dnfirst;
+ f->dump_stream("snapid.last") << dnlast;
+ f->dump_int("dentry version", dnv);
+ f->open_object_section("inode");
+ inode.dump(f);
+ f->close_section(); // inode
+ f->open_array_section("xattrs");
+ for (map<string, bufferptr>::const_iterator iter = xattrs.begin();
+ iter != xattrs.end(); ++iter) {
+ f->dump_string(iter->first.c_str(), iter->second.c_str());
+ }
+ f->close_section(); // xattrs
+ if (inode.is_symlink()) {
+ f->dump_string("symlink", symlink);
+ }
+ if (inode.is_dir()) {
+ f->dump_stream("frag tree") << dirfragtree;
+ f->dump_string("has_snapbl", snapbl.length() ? "true" : "false");
+ if (dir_layout) {
+ f->open_object_section("file layout policy");
+ dir_layout->dump(f);
+ f->close_section(); // file layout policy
+ }
+ }
+ f->dump_string("dirty", dirty ? "true" : "false");
+ if (!old_inodes.empty()) {
+ f->open_array_section("old inodes");
+ for (old_inodes_t::const_iterator iter = old_inodes.begin();
+ iter != old_inodes.end(); ++iter) {
+ f->open_object_section("inode");
+ f->dump_int("snapid", iter->first);
+ iter->second.dump(f);
+ f->close_section(); // inode
+ }
+ f->close_section(); // old inodes
+ }
+}
+
+void EMetaBlob::fullbit::generate_test_instances(list<EMetaBlob::fullbit*>& ls)
+{
+ inode_t inode;
+ fragtree_t fragtree;
+ map<string,bufferptr> empty_xattrs;
+ bufferlist empty_snapbl;
+ fullbit *sample = new fullbit("/testdn", 0, 0, 0,
+ inode, fragtree, empty_xattrs, "", empty_snapbl,
+ false, NULL, NULL);
+ ls.push_back(sample);
+}
+
void EMetaBlob::fullbit::update_inode(MDS *mds, CInode *in)
{
in->inode = inode;
@@ -417,6 +519,366 @@ void EMetaBlob::fullbit::update_inode(MDS *mds, CInode *in)
in->old_inodes = old_inodes;
}
+// EMetaBlob::remotebit
+
+void EMetaBlob::remotebit::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ if (!_enc.length()) {
+ remotebit copy(dn, dnfirst, dnlast, dnv, ino, d_type, dirty);
+ bl.append(copy._enc);
+ } else {
+ bl.append(_enc);
+ }
+ ENCODE_FINISH(bl);
+}
+
+void EMetaBlob::remotebit::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(dn, bl);
+ ::decode(dnfirst, bl);
+ ::decode(dnlast, bl);
+ ::decode(dnv, bl);
+ ::decode(ino, bl);
+ ::decode(d_type, bl);
+ ::decode(dirty, bl);
+ DECODE_FINISH(bl);
+}
+
+void EMetaBlob::remotebit::dump(Formatter *f) const
+{
+ if (_enc.length() && !dn.length()) {
+ /* if our bufferlist has data but our name is empty, we
+ * haven't initialized ourselves; do so in order to print members!
+ * We use const_cast here because the whole point is we aren't
+ * fully set up and this isn't changing who we "are", just our
+ * representation.
+ */
+ EMetaBlob::remotebit *me = const_cast<EMetaBlob::remotebit*>(this);
+ bufferlist encoded;
+ encode(encoded);
+ bufferlist::iterator p = encoded.begin();
+ me->decode(p);
+ }
+ f->dump_string("dentry", dn);
+ f->dump_int("snapid.first", dnfirst);
+ f->dump_int("snapid.last", dnlast);
+ f->dump_int("dentry version", dnv);
+ f->dump_int("inodeno", ino);
+ uint32_t type = DTTOIF(d_type) & S_IFMT; // convert to type entries
+ string type_string;
+ switch(type) {
+ case S_IFREG:
+ type_string = "file"; break;
+ case S_IFLNK:
+ type_string = "symlink"; break;
+ case S_IFDIR:
+ type_string = "directory"; break;
+ default:
+ assert (0 == "unknown d_type!");
+ }
+ f->dump_string("d_type", type_string);
+ f->dump_string("dirty", dirty ? "true" : "false");
+}
+
+void EMetaBlob::remotebit::
+generate_test_instances(list<EMetaBlob::remotebit*>& ls)
+{
+ remotebit *remote = new remotebit("/test/dn", 0, 10, 15, 1, IFTODT(S_IFREG), false);
+ ls.push_back(remote);
+}
+
+// EMetaBlob::nullbit
+
+void EMetaBlob::nullbit::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ if (!_enc.length()) {
+ nullbit copy(dn, dnfirst, dnlast, dnv, dirty);
+ bl.append(copy._enc);
+ } else {
+ bl.append(_enc);
+ }
+ ENCODE_FINISH(bl);
+}
+
+void EMetaBlob::nullbit::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(dn, bl);
+ ::decode(dnfirst, bl);
+ ::decode(dnlast, bl);
+ ::decode(dnv, bl);
+ ::decode(dirty, bl);
+ DECODE_FINISH(bl);
+}
+
+void EMetaBlob::nullbit::dump(Formatter *f) const
+{
+ if (_enc.length() && !dn.length()) {
+ /* if our bufferlist has data but our name is empty, we
+ * haven't initialized ourselves; do so in order to print members!
+ * We use const_cast here because the whole point is we aren't
+ * fully set up and this isn't changing who we "are", just our
+ * representation.
+ */
+ EMetaBlob::nullbit *me = const_cast<EMetaBlob::nullbit*>(this);
+ bufferlist encoded;
+ encode(encoded);
+ bufferlist::iterator p = encoded.begin();
+ me->decode(p);
+ }
+ f->dump_string("dentry", dn);
+ f->dump_int("snapid.first", dnfirst);
+ f->dump_int("snapid.last", dnlast);
+ f->dump_int("dentry version", dnv);
+ f->dump_string("dirty", dirty ? "true" : "false");
+}
+
+void EMetaBlob::nullbit::generate_test_instances(list<nullbit*>& ls)
+{
+ nullbit *sample = new nullbit("/test/dentry", 0, 10, 15, false);
+ nullbit *sample2 = new nullbit("/test/dirty", 10, 20, 25, true);
+ ls.push_back(sample);
+ ls.push_back(sample2);
+}
+
+// EMetaBlob::dirlump
+
+void EMetaBlob::dirlump::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(fnode, bl);
+ ::encode(state, bl);
+ ::encode(nfull, bl);
+ ::encode(nremote, bl);
+ ::encode(nnull, bl);
+ _encode_bits();
+ ::encode(dnbl, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EMetaBlob::dirlump::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl)
+ ::decode(fnode, bl);
+ ::decode(state, bl);
+ ::decode(nfull, bl);
+ ::decode(nremote, bl);
+ ::decode(nnull, bl);
+ ::decode(dnbl, bl);
+ dn_decoded = false; // don't decode bits unless we need them.
+ DECODE_FINISH(bl);
+}
+
+void EMetaBlob::dirlump::dump(Formatter *f) const
+{
+ if (!dn_decoded) {
+ dirlump *me = const_cast<dirlump*>(this);
+ me->_decode_bits();
+ }
+ f->open_object_section("fnode");
+ fnode.dump(f);
+ f->close_section(); // fnode
+ f->dump_string("state", state_string());
+ f->dump_int("nfull", nfull);
+ f->dump_int("nremote", nremote);
+ f->dump_int("nnull", nnull);
+
+ f->open_array_section("full bits");
+ for (list<std::tr1::shared_ptr<fullbit> >::const_iterator
+ iter = dfull.begin(); iter != dfull.end(); ++iter) {
+ f->open_object_section("fullbit");
+ (*iter)->dump(f);
+ f->close_section(); // fullbit
+ }
+ f->close_section(); // full bits
+ f->open_array_section("remote bits");
+ for (list<remotebit>::const_iterator
+ iter = dremote.begin(); iter != dremote.end(); ++iter) {
+ f->open_object_section("remotebit");
+ (*iter).dump(f);
+ f->close_section(); // remotebit
+ }
+ f->close_section(); // remote bits
+ f->open_array_section("null bits");
+ for (list<nullbit>::const_iterator
+ iter = dnull.begin(); iter != dnull.end(); ++iter) {
+ f->open_object_section("null bit");
+ (*iter).dump(f);
+ f->close_section(); // null bit
+ }
+ f->close_section(); // null bits
+}
+
+void EMetaBlob::dirlump::generate_test_instances(list<dirlump*>& ls)
+{
+ ls.push_back(new dirlump());
+}
+
+/**
+ * EMetaBlob proper
+ */
+void EMetaBlob::encode(bufferlist& bl) const
+{
+ ENCODE_START(5, 5, bl);
+ ::encode(lump_order, bl);
+ ::encode(lump_map, bl);
+ ::encode(roots, bl);
+ ::encode(table_tids, bl);
+ ::encode(opened_ino, bl);
+ ::encode(allocated_ino, bl);
+ ::encode(used_preallocated_ino, bl);
+ ::encode(preallocated_inos, bl);
+ ::encode(client_name, bl);
+ ::encode(inotablev, bl);
+ ::encode(sessionmapv, bl);
+ ::encode(truncate_start, bl);
+ ::encode(truncate_finish, bl);
+ ::encode(destroyed_inodes, bl);
+ ::encode(client_reqs, bl);
+ ::encode(renamed_dirino, bl);
+ ::encode(renamed_dir_frags, bl);
+ ENCODE_FINISH(bl);
+}
+void EMetaBlob::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(5, 5, 5, bl);
+ ::decode(lump_order, bl);
+ ::decode(lump_map, bl);
+ if (struct_v >= 4) {
+ ::decode(roots, bl);
+ } else {
+ bufferlist rootbl;
+ ::decode(rootbl, bl);
+ if (rootbl.length()) {
+ bufferlist::iterator p = rootbl.begin();
+ roots.push_back(std::tr1::shared_ptr<fullbit>(new fullbit(p)));
+ }
+ }
+ ::decode(table_tids, bl);
+ ::decode(opened_ino, bl);
+ ::decode(allocated_ino, bl);
+ ::decode(used_preallocated_ino, bl);
+ ::decode(preallocated_inos, bl);
+ ::decode(client_name, bl);
+ ::decode(inotablev, bl);
+ ::decode(sessionmapv, bl);
+ ::decode(truncate_start, bl);
+ ::decode(truncate_finish, bl);
+ ::decode(destroyed_inodes, bl);
+ if (struct_v >= 2) {
+ ::decode(client_reqs, bl);
+ } else {
+ list<metareqid_t> r;
+ ::decode(r, bl);
+ while (!r.empty()) {
+ client_reqs.push_back(pair<metareqid_t,uint64_t>(r.front(), 0));
+ r.pop_front();
+ }
+ }
+ if (struct_v >= 3) {
+ ::decode(renamed_dirino, bl);
+ ::decode(renamed_dir_frags, bl);
+ }
+ DECODE_FINISH(bl);
+}
+
+void EMetaBlob::dump(Formatter *f) const
+{
+ f->open_array_section("lumps");
+ for (list<dirfrag_t>::const_iterator i = lump_order.begin();
+ i != lump_order.end(); ++i) {
+ f->open_object_section("lump");
+ f->open_object_section("dirfrag");
+ f->dump_stream("dirfrag") << *i;
+ f->close_section(); // dirfrag
+ f->open_object_section("dirlump");
+ lump_map.at(*i).dump(f);
+ f->close_section(); // dirlump
+ f->close_section(); // lump
+ }
+ f->close_section(); // lumps
+
+ f->open_array_section("roots");
+ for (list<std::tr1::shared_ptr<fullbit> >::const_iterator i = roots.begin();
+ i != roots.end(); ++i) {
+ f->open_object_section("root");
+ (*i)->dump(f);
+ f->close_section(); // root
+ }
+ f->close_section(); // roots
+
+ f->open_array_section("tableclient tranactions");
+ for (list<pair<__u8,version_t> >::const_iterator i = table_tids.begin();
+ i != table_tids.end(); ++i) {
+ f->open_object_section("transaction");
+ f->dump_int("tid", i->first);
+ f->dump_int("version", i->second);
+ f->close_section(); // transaction
+ }
+ f->close_section(); // tableclient transactions
+
+ f->dump_int("renamed directory inodeno", renamed_dirino);
+
+ f->open_array_section("renamed directory fragments");
+ for (list<frag_t>::const_iterator i = renamed_dir_frags.begin();
+ i != renamed_dir_frags.end(); ++i) {
+ f->dump_int("frag", *i);
+ }
+ f->close_section(); // renamed directory fragments
+
+ f->dump_int("inotable version", inotablev);
+ f->dump_int("SesionMap version", sessionmapv);
+ f->dump_int("allocated ino", allocated_ino);
+
+ f->dump_stream("preallocated inos") << preallocated_inos;
+ f->dump_int("used preallocated ino", used_preallocated_ino);
+
+ f->open_object_section("client name");
+ client_name.dump(f);
+ f->close_section(); // client name
+
+ f->open_array_section("inodes starting a truncate");
+ for(list<inodeno_t>::const_iterator i = truncate_start.begin();
+ i != truncate_start.end(); ++i) {
+ f->dump_int("inodeno", *i);
+ }
+ f->close_section(); // truncate inodes
+ f->open_array_section("inodes finishing a truncated");
+ for(map<inodeno_t,uint64_t>::const_iterator i = truncate_finish.begin();
+ i != truncate_finish.end(); ++i) {
+ f->open_object_section("inode+segment");
+ f->dump_int("inodeno", i->first);
+ f->dump_int("truncate starting segment", i->second);
+ f->close_section(); // truncated inode
+ }
+ f->close_section(); // truncate finish inodes
+
+ f->open_array_section("destroyed inodes");
+ for(vector<inodeno_t>::const_iterator i = destroyed_inodes.begin();
+ i != destroyed_inodes.end(); ++i) {
+ f->dump_int("inodeno", *i);
+ }
+ f->close_section(); // destroyed inodes
+
+ f->open_array_section("client requests");
+ for(list<pair<metareqid_t,uint64_t> >::const_iterator i = client_reqs.begin();
+ i != client_reqs.end(); ++i) {
+ f->open_object_section("Client request");
+ f->dump_stream("request ID") << i->first;
+ f->dump_int("oldest request on client", i->second);
+ f->close_section(); // request
+ }
+ f->close_section(); // client requests
+}
+
+void EMetaBlob::generate_test_instances(list<EMetaBlob*>& ls)
+{
+ ls.push_back(new EMetaBlob());
+}
+
void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
{
dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl;
@@ -782,9 +1244,9 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
<< dendl;
Session *session = mds->sessionmap.get_session(client_name);
assert(session);
- dout(20) << " (session prealloc " << session->prealloc_inos << ")" << dendl;
+ dout(20) << " (session prealloc " << session->info.prealloc_inos << ")" << dendl;
if (used_preallocated_ino) {
- if (session->prealloc_inos.empty()) {
+ if (session->info.prealloc_inos.empty()) {
// HRM: badness in the journal
mds->clog.warn() << " replayed op " << client_reqs << " on session for " << client_name
<< " with empty prealloc_inos\n";
@@ -795,12 +1257,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
mds->clog.warn() << " replayed op " << client_reqs << " used ino " << i
<< " but session next is " << next << "\n";
assert(i == used_preallocated_ino);
- session->used_inos.clear();
+ session->info.used_inos.clear();
}
mds->sessionmap.projected = ++mds->sessionmap.version;
}
if (preallocated_inos.size()) {
- session->prealloc_inos.insert(preallocated_inos);
+ session->info.prealloc_inos.insert(preallocated_inos);
mds->sessionmap.projected = ++mds->sessionmap.version;
}
assert(sessionmapv == mds->sessionmap.version);
@@ -878,16 +1340,16 @@ void ESession::replay(MDS *mds)
if (open) {
session = mds->sessionmap.get_or_add_session(client_inst);
mds->sessionmap.set_state(session, Session::STATE_OPEN);
- dout(10) << " opened session " << session->inst << dendl;
+ dout(10) << " opened session " << session->info.inst << dendl;
} else {
session = mds->sessionmap.get_session(client_inst.name);
if (session) { // there always should be a session, but there's a bug
if (session->connection == NULL) {
- dout(10) << " removed session " << session->inst << dendl;
+ dout(10) << " removed session " << session->info.inst << dendl;
mds->sessionmap.remove_session(session);
} else {
session->clear(); // the client has reconnected; keep the Session, but reset
- dout(10) << " reset session " << session->inst << " (they reconnected)" << dendl;
+ dout(10) << " reset session " << session->info.inst << " (they reconnected)" << dendl;
}
} else {
mds->clog.error() << "replayed stray Session close event for " << client_inst
@@ -912,6 +1374,95 @@ void ESession::replay(MDS *mds)
update_segment();
}
+void ESession::encode(bufferlist &bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(client_inst, bl);
+ ::encode(open, bl);
+ ::encode(cmapv, bl);
+ ::encode(inos, bl);
+ ::encode(inotablev, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ESession::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(client_inst, bl);
+ ::decode(open, bl);
+ ::decode(cmapv, bl);
+ ::decode(inos, bl);
+ ::decode(inotablev, bl);
+ DECODE_FINISH(bl);
+}
+
+void ESession::dump(Formatter *f) const
+{
+ f->dump_stream("client instance") << client_inst;
+ f->dump_string("open", open ? "true" : "false");
+ f->dump_int("client map version", cmapv);
+ f->dump_stream("inos") << inos;
+ f->dump_int("inotable version", inotablev);
+}
+
+void ESession::generate_test_instances(list<ESession*>& ls)
+{
+ ls.push_back(new ESession);
+}
+
+// -----------------------
+// ESessions
+
+void ESessions::encode(bufferlist &bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(client_map, bl);
+ ::encode(cmapv, bl);
+ ::encode(stamp, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ESessions::decode_old(bufferlist::iterator &bl)
+{
+ ::decode(client_map, bl);
+ ::decode(cmapv, bl);
+ if (!bl.end())
+ ::decode(stamp, bl);
+}
+
+void ESessions::decode_new(bufferlist::iterator &bl)
+{
+ DECODE_START(1, bl);
+ ::decode(client_map, bl);
+ ::decode(cmapv, bl);
+ if (!bl.end())
+ ::decode(stamp, bl);
+ DECODE_FINISH(bl);
+}
+
+void ESessions::dump(Formatter *f) const
+{
+ f->dump_int("client map version", cmapv);
+
+ f->open_array_section("client map");
+ for (map<client_t,entity_inst_t>::const_iterator i = client_map.begin();
+ i != client_map.end(); ++i) {
+ f->open_object_section("client");
+ f->dump_int("client id", i->first.v);
+ f->dump_stream("client entity") << i->second;
+ f->close_section(); // client
+ }
+ f->close_section(); // client map
+}
+
+void ESessions::generate_test_instances(list<ESessions*>& ls)
+{
+ ls.push_back(new ESessions());
+}
+
void ESessions::update_segment()
{
_segment->sessionmapv = cmapv;
@@ -933,6 +1484,52 @@ void ESessions::replay(MDS *mds)
}
+// -----------------------
+// ETableServer
+
+void ETableServer::encode(bufferlist& bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(table, bl);
+ ::encode(op, bl);
+ ::encode(reqid, bl);
+ ::encode(bymds, bl);
+ ::encode(mutation, bl);
+ ::encode(tid, bl);
+ ::encode(version, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ETableServer::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(table, bl);
+ ::decode(op, bl);
+ ::decode(reqid, bl);
+ ::decode(bymds, bl);
+ ::decode(mutation, bl);
+ ::decode(tid, bl);
+ ::decode(version, bl);
+ DECODE_FINISH(bl);
+}
+
+void ETableServer::dump(Formatter *f) const
+{
+ f->dump_int("table id", table);
+ f->dump_int("op", op);
+ f->dump_int("request id", reqid);
+ f->dump_int("by mds", bymds);
+ f->dump_int("tid", tid);
+ f->dump_int("version", version);
+}
+
+void ETableServer::generate_test_instances(list<ETableServer*>& ls)
+{
+ ls.push_back(new ETableServer());
+}
void ETableServer::update_segment()
@@ -981,6 +1578,42 @@ void ETableServer::replay(MDS *mds)
}
+// ---------------------
+// ETableClient
+
+void ETableClient::encode(bufferlist& bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(table, bl);
+ ::encode(op, bl);
+ ::encode(tid, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ETableClient::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(table, bl);
+ ::decode(op, bl);
+ ::decode(tid, bl);
+ DECODE_FINISH(bl);
+}
+
+void ETableClient::dump(Formatter *f) const
+{
+ f->dump_int("table", table);
+ f->dump_int("op", op);
+ f->dump_int("tid", tid);
+}
+
+void ETableClient::generate_test_instances(list<ETableClient*>& ls)
+{
+ ls.push_back(new ETableClient());
+}
+
void ETableClient::replay(MDS *mds)
{
dout(10) << " ETableClient.replay " << get_mdstable_name(table)
@@ -1030,6 +1663,53 @@ void ESnap::replay(MDS *mds)
// -----------------------
// EUpdate
+void EUpdate::encode(bufferlist &bl) const
+{
+ ENCODE_START(4, 4, bl);
+ ::encode(stamp, bl);
+ ::encode(type, bl);
+ ::encode(metablob, bl);
+ ::encode(client_map, bl);
+ ::encode(cmapv, bl);
+ ::encode(reqid, bl);
+ ::encode(had_slaves, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EUpdate::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(type, bl);
+ ::decode(metablob, bl);
+ ::decode(client_map, bl);
+ if (struct_v >= 3)
+ ::decode(cmapv, bl);
+ ::decode(reqid, bl);
+ ::decode(had_slaves, bl);
+ DECODE_FINISH(bl);
+}
+
+void EUpdate::dump(Formatter *f) const
+{
+ f->open_object_section("metablob");
+ metablob.dump(f);
+ f->close_section(); // metablob
+
+ f->dump_string("type", type);
+ f->dump_int("client map length", client_map.length());
+ f->dump_int("client map version", cmapv);
+ f->dump_stream("reqid") << reqid;
+ f->dump_string("had slaves", had_slaves ? "true" : "false");
+}
+
+void EUpdate::generate_test_instances(list<EUpdate*>& ls)
+{
+ ls.push_back(new EUpdate());
+}
+
+
void EUpdate::update_segment()
{
metablob.update_segment(_segment);
@@ -1074,6 +1754,43 @@ void EUpdate::replay(MDS *mds)
// ------------------------
// EOpen
+void EOpen::encode(bufferlist &bl) const {
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(metablob, bl);
+ ::encode(inos, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EOpen::decode(bufferlist::iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(metablob, bl);
+ ::decode(inos, bl);
+ DECODE_FINISH(bl);
+}
+
+void EOpen::dump(Formatter *f) const
+{
+ f->open_object_section("metablob");
+ metablob.dump(f);
+ f->close_section(); // metablob
+ f->open_array_section("inos involved");
+ for (vector<inodeno_t>::const_iterator i = inos.begin();
+ i != inos.end(); ++i) {
+ f->dump_int("ino", *i);
+ }
+ f->close_section(); // inos
+}
+
+void EOpen::generate_test_instances(list<EOpen*>& ls)
+{
+ ls.push_back(new EOpen());
+ ls.push_back(new EOpen());
+ ls.back()->add_ino(0);
+}
+
void EOpen::update_segment()
{
// ??
@@ -1112,11 +1829,266 @@ void ECommitted::replay(MDS *mds)
}
}
+void ECommitted::encode(bufferlist& bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(reqid, bl);
+ ENCODE_FINISH(bl);
+}
+void ECommitted::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(reqid, bl);
+ DECODE_FINISH(bl);
+}
+
+void ECommitted::dump(Formatter *f) const {
+ f->dump_stream("stamp") << stamp;
+ f->dump_stream("reqid") << reqid;
+}
+
+void ECommitted::generate_test_instances(list<ECommitted*>& ls)
+{
+ ls.push_back(new ECommitted);
+ ls.push_back(new ECommitted);
+ ls.back()->stamp = utime_t(1, 2);
+ ls.back()->reqid = metareqid_t(entity_name_t::CLIENT(123), 456);
+}
// -----------------------
// ESlaveUpdate
+void link_rollback::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(reqid, bl);
+ ::encode(ino, bl);
+ ::encode(was_inc, bl);
+ ::encode(old_ctime, bl);
+ ::encode(old_dir_mtime, bl);
+ ::encode(old_dir_rctime, bl);
+ ENCODE_FINISH(bl);
+}
+
+void link_rollback::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(reqid, bl);
+ ::decode(ino, bl);
+ ::decode(was_inc, bl);
+ ::decode(old_ctime, bl);
+ ::decode(old_dir_mtime, bl);
+ ::decode(old_dir_rctime, bl);
+ DECODE_FINISH(bl);
+}
+
+void link_rollback::dump(Formatter *f) const
+{
+ f->dump_stream("metareqid") << reqid;
+ f->dump_int("ino", ino);
+ f->dump_string("was incremented", was_inc ? "true" : "false");
+ f->dump_stream("old_ctime") << old_ctime;
+ f->dump_stream("old_dir_mtime") << old_dir_mtime;
+ f->dump_stream("old_dir_rctime") << old_dir_rctime;
+}
+
+void link_rollback::generate_test_instances(list<link_rollback*>& ls)
+{
+ ls.push_back(new link_rollback());
+}
+
+void rmdir_rollback::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(reqid, bl);
+ ::encode(src_dir, bl);
+ ::encode(src_dname, bl);
+ ::encode(dest_dir, bl);
+ ::encode(dest_dname, bl);
+ ENCODE_FINISH(bl);
+}
+
+void rmdir_rollback::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(reqid, bl);
+ ::decode(src_dir, bl);
+ ::decode(src_dname, bl);
+ ::decode(dest_dir, bl);
+ ::decode(dest_dname, bl);
+ DECODE_FINISH(bl);
+}
+
+void rmdir_rollback::dump(Formatter *f) const
+{
+ f->dump_stream("metareqid") << reqid;
+ f->dump_stream("source directory") << src_dir;
+ f->dump_string("source dname", src_dname);
+ f->dump_stream("destination directory") << dest_dir;
+ f->dump_string("destination dname", dest_dname);
+}
+
+void rmdir_rollback::generate_test_instances(list<rmdir_rollback*>& ls)
+{
+ ls.push_back(new rmdir_rollback());
+}
+
+void rename_rollback::drec::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(dirfrag, bl);
+ ::encode(dirfrag_old_mtime, bl);
+ ::encode(dirfrag_old_rctime, bl);
+ ::encode(ino, bl);
+ ::encode(remote_ino, bl);
+ ::encode(dname, bl);
+ ::encode(remote_d_type, bl);
+ ::encode(old_ctime, bl);
+ ENCODE_FINISH(bl);
+}
+
+void rename_rollback::drec::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(dirfrag, bl);
+ ::decode(dirfrag_old_mtime, bl);
+ ::decode(dirfrag_old_rctime, bl);
+ ::decode(ino, bl);
+ ::decode(remote_ino, bl);
+ ::decode(dname, bl);
+ ::decode(remote_d_type, bl);
+ ::decode(old_ctime, bl);
+ DECODE_FINISH(bl);
+}
+
+void rename_rollback::drec::dump(Formatter *f) const
+{
+ f->dump_stream("directory fragment") << dirfrag;
+ f->dump_stream("directory old mtime") << dirfrag_old_mtime;
+ f->dump_stream("directory old rctime") << dirfrag_old_rctime;
+ f->dump_int("ino", ino);
+ f->dump_int("remote ino", remote_ino);
+ f->dump_string("dname", dname);
+ uint32_t type = DTTOIF(remote_d_type) & S_IFMT; // convert to type entries
+ string type_string;
+ switch(type) {
+ case S_IFREG:
+ type_string = "file"; break;
+ case S_IFLNK:
+ type_string = "symlink"; break;
+ case S_IFDIR:
+ type_string = "directory"; break;
+ default:
+ assert (0 == "unknown d_type!");
+ }
+ f->dump_string("remote dtype", type_string);
+ f->dump_stream("old ctime") << old_ctime;
+}
+
+void rename_rollback::drec::generate_test_instances(list<drec*>& ls)
+{
+ ls.push_back(new drec());
+ ls.back()->remote_d_type = IFTODT(S_IFREG);
+}
+
+void rename_rollback::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(reqid, bl);
+ encode(orig_src, bl);
+ encode(orig_dest, bl);
+ encode(stray, bl);
+ ::encode(ctime, bl);
+ ENCODE_FINISH(bl);
+}
+
+void rename_rollback::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(reqid, bl);
+ decode(orig_src, bl);
+ decode(orig_dest, bl);
+ decode(stray, bl);
+ ::decode(ctime, bl);
+ DECODE_FINISH(bl);
+}
+
+void rename_rollback::dump(Formatter *f) const
+{
+ f->dump_stream("request id") << reqid;
+ f->open_object_section("original src drec");
+ orig_src.dump(f);
+ f->close_section(); // original src drec
+ f->open_object_section("original dest drec");
+ orig_dest.dump(f);
+ f->close_section(); // original dest drec
+ f->open_object_section("stray drec");
+ stray.dump(f);
+ f->close_section(); // stray drec
+ f->dump_stream("ctime") << ctime;
+}
+
+void rename_rollback::generate_test_instances(list<rename_rollback*>& ls)
+{
+ ls.push_back(new rename_rollback());
+ ls.back()->orig_src.remote_d_type = IFTODT(S_IFREG);
+ ls.back()->orig_dest.remote_d_type = IFTODT(S_IFREG);
+ ls.back()->stray.remote_d_type = IFTODT(S_IFREG);
+}
+
+void ESlaveUpdate::encode(bufferlist &bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(type, bl);
+ ::encode(reqid, bl);
+ ::encode(master, bl);
+ ::encode(op, bl);
+ ::encode(origop, bl);
+ ::encode(commit, bl);
+ ::encode(rollback, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ESlaveUpdate::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(type, bl);
+ ::decode(reqid, bl);
+ ::decode(master, bl);
+ ::decode(op, bl);
+ ::decode(origop, bl);
+ ::decode(commit, bl);
+ ::decode(rollback, bl);
+ DECODE_FINISH(bl);
+}
+
+void ESlaveUpdate::dump(Formatter *f) const
+{
+ f->open_object_section("metablob");
+ commit.dump(f);
+ f->close_section(); // metablob
+
+ f->dump_int("rollback length", rollback.length());
+ f->dump_string("type", type);
+ f->dump_stream("metareqid") << reqid;
+ f->dump_int("master", master);
+ f->dump_int("op", op);
+ f->dump_int("original op", origop);
+}
+
+void ESlaveUpdate::generate_test_instances(list<ESlaveUpdate*>& ls)
+{
+ ls.push_back(new ESlaveUpdate());
+}
+
+
void ESlaveUpdate::replay(MDS *mds)
{
MDSlaveUpdate *su;
@@ -1158,6 +2130,65 @@ void ESlaveUpdate::replay(MDS *mds)
// -----------------------
// ESubtreeMap
+void ESubtreeMap::encode(bufferlist& bl) const
+{
+ ENCODE_START(5, 5, bl);
+ ::encode(stamp, bl);
+ ::encode(metablob, bl);
+ ::encode(subtrees, bl);
+ ::encode(ambiguous_subtrees, bl);
+ ::encode(expire_pos, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ESubtreeMap::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(5, 5, 5, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(metablob, bl);
+ ::decode(subtrees, bl);
+ if (struct_v >= 4)
+ ::decode(ambiguous_subtrees, bl);
+ if (struct_v >= 3)
+ ::decode(expire_pos, bl);
+ DECODE_FINISH(bl);
+}
+
+void ESubtreeMap::dump(Formatter *f) const
+{
+ f->open_object_section("metablob");
+ metablob.dump(f);
+ f->close_section(); // metablob
+
+ f->open_array_section("subtrees");
+ for(map<dirfrag_t,vector<dirfrag_t> >::const_iterator i = subtrees.begin();
+ i != subtrees.end(); ++i) {
+ f->open_object_section("tree");
+ f->dump_stream("root dirfrag") << i->first;
+ for (vector<dirfrag_t>::const_iterator j = i->second.begin();
+ j != i->second.end(); ++j) {
+ f->dump_stream("bound dirfrag") << *j;
+ }
+ f->close_section(); // tree
+ }
+ f->close_section(); // subtrees
+
+ f->open_array_section("ambiguous subtrees");
+ for(set<dirfrag_t>::const_iterator i = ambiguous_subtrees.begin();
+ i != ambiguous_subtrees.end(); ++i) {
+ f->dump_stream("dirfrag") << *i;
+ }
+ f->close_section(); // ambiguous subtrees
+
+ f->dump_int("expire position", expire_pos);
+}
+
+void ESubtreeMap::generate_test_instances(list<ESubtreeMap*>& ls)
+{
+ ls.push_back(new ESubtreeMap());
+}
+
void ESubtreeMap::replay(MDS *mds)
{
if (expire_pos && expire_pos > mds->mdlog->journaler->get_expire_pos())
@@ -1326,6 +2357,51 @@ void EFragment::replay(MDS *mds)
in->verify_dirfrags();
}
+void EFragment::encode(bufferlist &bl) const {
+ ENCODE_START(4, 4, bl);
+ ::encode(stamp, bl);
+ ::encode(op, bl);
+ ::encode(ino, bl);
+ ::encode(basefrag, bl);
+ ::encode(bits, bl);
+ ::encode(metablob, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EFragment::decode(bufferlist::iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ if (struct_v >= 3)
+ ::decode(op, bl);
+ else
+ op = OP_ONESHOT;
+ ::decode(ino, bl);
+ ::decode(basefrag, bl);
+ ::decode(bits, bl);
+ ::decode(metablob, bl);
+ DECODE_FINISH(bl);
+}
+
+void EFragment::dump(Formatter *f) const
+{
+ /*f->open_object_section("Metablob");
+ metablob.dump(f); // sadly we don't have this; dunno if we'll get it
+ f->close_section();*/
+ f->dump_string("op", op_name(op));
+ f->dump_stream("ino") << ino;
+ f->dump_stream("base frag") << basefrag;
+ f->dump_int("bits", bits);
+}
+
+void EFragment::generate_test_instances(list<EFragment*>& ls)
+{
+ ls.push_back(new EFragment);
+ ls.push_back(new EFragment);
+ ls.back()->op = OP_PREPARE;
+ ls.back()->ino = 1;
+ ls.back()->bits = 5;
+}
@@ -1359,6 +2435,47 @@ void EExport::replay(MDS *mds)
mds->mdcache->try_trim_non_auth_subtree(dir);
}
+void EExport::encode(bufferlist& bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(metablob, bl);
+ ::encode(base, bl);
+ ::encode(bounds, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EExport::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(metablob, bl);
+ ::decode(base, bl);
+ ::decode(bounds, bl);
+ DECODE_FINISH(bl);
+}
+
+void EExport::dump(Formatter *f) const
+{
+ f->dump_float("stamp", (double)stamp);
+ /*f->open_object_section("Metablob");
+ metablob.dump(f); // sadly we don't have this; dunno if we'll get it
+ f->close_section();*/
+ f->dump_stream("base dirfrag") << base;
+ f->open_array_section("bounds dirfrags");
+ for (set<dirfrag_t>::const_iterator i = bounds.begin();
+ i != bounds.end(); ++i) {
+ f->dump_stream("dirfrag") << *i;
+ }
+ f->close_section(); // bounds dirfrags
+}
+
+void EExport::generate_test_instances(list<EExport*>& ls)
+{
+ EExport *sample = new EExport();
+ ls.push_back(sample);
+}
// -----------------------
@@ -1400,6 +2517,45 @@ void EImportStart::replay(MDS *mds)
update_segment();
}
+void EImportStart::encode(bufferlist &bl) const {
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(base, bl);
+ ::encode(metablob, bl);
+ ::encode(bounds, bl);
+ ::encode(cmapv, bl);
+ ::encode(client_map, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EImportStart::decode(bufferlist::iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(base, bl);
+ ::decode(metablob, bl);
+ ::decode(bounds, bl);
+ ::decode(cmapv, bl);
+ ::decode(client_map, bl);
+ DECODE_FINISH(bl);
+}
+
+void EImportStart::dump(Formatter *f) const
+{
+ f->dump_stream("base dirfrag") << base;
+ f->open_array_section("boundary dirfrags");
+ for (vector<dirfrag_t>::const_iterator iter = bounds.begin();
+ iter != bounds.end(); ++iter) {
+ f->dump_stream("frag") << *iter;
+ }
+ f->close_section();
+}
+
+void EImportStart::generate_test_instances(list<EImportStart*>& ls)
+{
+ ls.push_back(new EImportStart);
+}
+
// -----------------------
// EImportFinish
@@ -1426,11 +2582,65 @@ void EImportFinish::replay(MDS *mds)
}
}
+void EImportFinish::encode(bufferlist& bl) const
+{
+ ENCODE_START(3, 3, bl);
+ ::encode(stamp, bl);
+ ::encode(base, bl);
+ ::encode(success, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EImportFinish::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ if (struct_v >= 2)
+ ::decode(stamp, bl);
+ ::decode(base, bl);
+ ::decode(success, bl);
+ DECODE_FINISH(bl);
+}
+
+void EImportFinish::dump(Formatter *f) const
+{
+ f->dump_stream("base dirfrag") << base;
+ f->dump_string("success", success ? "true" : "false");
+}
+void EImportFinish::generate_test_instances(list<EImportFinish*>& ls)
+{
+ ls.push_back(new EImportFinish);
+ ls.push_back(new EImportFinish);
+ ls.back()->success = true;
+}
// ------------------------
// EResetJournal
+void EResetJournal::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(stamp, bl);
+ ENCODE_FINISH(bl);
+}
+
+void EResetJournal::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(stamp, bl);
+ DECODE_FINISH(bl);
+}
+
+void EResetJournal::dump(Formatter *f) const
+{
+ f->dump_stream("timestamp") << stamp;
+}
+
+void EResetJournal::generate_test_instances(list<EResetJournal*>& ls)
+{
+ ls.push_back(new EResetJournal());
+}
+
void EResetJournal::replay(MDS *mds)
{
dout(1) << "EResetJournal" << dendl;
diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc
new file mode 100644
index 00000000000..6b87c221e56
--- /dev/null
+++ b/src/mds/mdstypes.cc
@@ -0,0 +1,892 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "mdstypes.h"
+#include "common/Formatter.h"
+
+/*
+ * file_layout_policy_t
+ */
+
+void file_layout_policy_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(layout, bl);
+ ENCODE_FINISH(bl);
+}
+
+void file_layout_policy_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(layout, bl);
+ DECODE_FINISH(bl);
+}
+
+void dump(const ceph_file_layout& l, Formatter *f)
+{
+ f->dump_unsigned("stripe_unit", l.fl_stripe_unit);
+ f->dump_unsigned("stripe_count", l.fl_stripe_count);
+ f->dump_unsigned("object_size", l.fl_object_size);
+ if (l.fl_cas_hash)
+ f->dump_unsigned("cas_hash", l.fl_cas_hash);
+ if (l.fl_object_stripe_unit)
+ f->dump_unsigned("object_stripe_unit", l.fl_object_stripe_unit);
+ if (l.fl_pg_pool)
+ f->dump_unsigned("pg_pool", l.fl_pg_pool);
+}
+
+void dump(const ceph_dir_layout& l, Formatter *f)
+{
+ f->dump_unsigned("dir_hash", l.dl_dir_hash);
+}
+
+void file_layout_policy_t::dump(Formatter *f) const
+{
+ ::dump(layout, f);
+}
+
+void file_layout_policy_t::generate_test_instances(list<file_layout_policy_t*>& ls)
+{
+ ls.push_back(new file_layout_policy_t);
+ ls.push_back(new file_layout_policy_t);
+ ls.back()->layout.fl_stripe_unit = 1024;
+ ls.back()->layout.fl_stripe_count = 2;
+ ls.back()->layout.fl_object_size = 2048;
+ ls.back()->layout.fl_cas_hash = 3;
+ ls.back()->layout.fl_object_stripe_unit = 8;
+ ls.back()->layout.fl_pg_pool = 9;
+}
+
+
+/*
+ * frag_info_t
+ */
+
+void frag_info_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(version, bl);
+ ::encode(mtime, bl);
+ ::encode(nfiles, bl);
+ ::encode(nsubdirs, bl);
+ ENCODE_FINISH(bl);
+}
+
+void frag_info_t::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(version, bl);
+ ::decode(mtime, bl);
+ ::decode(nfiles, bl);
+ ::decode(nsubdirs, bl);
+ DECODE_FINISH(bl);
+}
+
+void frag_info_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("version", version);
+ f->dump_stream("mtime") << mtime;
+ f->dump_unsigned("num_files", nfiles);
+ f->dump_unsigned("num_subdirs", nsubdirs);
+}
+
+void frag_info_t::generate_test_instances(list<frag_info_t*>& ls)
+{
+ ls.push_back(new frag_info_t);
+ ls.push_back(new frag_info_t);
+ ls.back()->version = 1;
+ ls.back()->mtime = utime_t(2, 3);
+ ls.back()->nfiles = 4;
+ ls.back()->nsubdirs = 5;
+}
+
+ostream& operator<<(ostream &out, const frag_info_t &f)
+{
+ if (f == frag_info_t())
+ return out << "f()";
+ out << "f(v" << f.version;
+ if (f.mtime != utime_t())
+ out << " m" << f.mtime;
+ if (f.nfiles || f.nsubdirs)
+ out << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs;
+ out << ")";
+ return out;
+}
+
+
+/*
+ * nest_info_t
+ */
+
+void nest_info_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(version, bl);
+ ::encode(rbytes, bl);
+ ::encode(rfiles, bl);
+ ::encode(rsubdirs, bl);
+ ::encode(ranchors, bl);
+ ::encode(rsnaprealms, bl);
+ ::encode(rctime, bl);
+ ENCODE_FINISH(bl);
+}
+
+void nest_info_t::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(version, bl);
+ ::decode(rbytes, bl);
+ ::decode(rfiles, bl);
+ ::decode(rsubdirs, bl);
+ ::decode(ranchors, bl);
+ ::decode(rsnaprealms, bl);
+ ::decode(rctime, bl);
+ DECODE_FINISH(bl);
+}
+
+void nest_info_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("rbytes", rbytes);
+ f->dump_unsigned("rfiles", rfiles);
+ f->dump_unsigned("rsubdirs", rsubdirs);
+ f->dump_unsigned("ranchors", ranchors);
+ f->dump_unsigned("rsnaprealms", rsnaprealms);
+ f->dump_stream("rctime") << rctime;
+}
+
+void nest_info_t::generate_test_instances(list<nest_info_t*>& ls)
+{
+ ls.push_back(new nest_info_t);
+ ls.push_back(new nest_info_t);
+ ls.back()->version = 1;
+ ls.back()->rbytes = 2;
+ ls.back()->rfiles = 3;
+ ls.back()->rsubdirs = 4;
+ ls.back()->ranchors = 5;
+ ls.back()->rsnaprealms = 6;
+ ls.back()->rctime = utime_t(7, 8);
+}
+
+ostream& operator<<(ostream &out, const nest_info_t &n)
+{
+ if (n == nest_info_t())
+ return out << "n()";
+ out << "n(v" << n.version;
+ if (n.rctime != utime_t())
+ out << " rc" << n.rctime;
+ if (n.rbytes)
+ out << " b" << n.rbytes;
+ if (n.ranchors)
+ out << " a" << n.ranchors;
+ if (n.rsnaprealms)
+ out << " sr" << n.rsnaprealms;
+ if (n.rfiles || n.rsubdirs)
+ out << " " << n.rsize() << "=" << n.rfiles << "+" << n.rsubdirs;
+ out << ")";
+ return out;
+}
+
+
+/*
+ * client_writeable_range_t
+ */
+
+void client_writeable_range_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(range.first, bl);
+ ::encode(range.last, bl);
+ ::encode(follows, bl);
+ ENCODE_FINISH(bl);
+}
+
+void client_writeable_range_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(range.first, bl);
+ ::decode(range.last, bl);
+ ::decode(follows, bl);
+ DECODE_FINISH(bl);
+}
+
+void client_writeable_range_t::dump(Formatter *f) const
+{
+ f->open_object_section("byte range");
+ f->dump_unsigned("first", range.first);
+ f->dump_unsigned("last", range.last);
+ f->close_section();
+ f->dump_unsigned("follows", follows);
+}
+
+void client_writeable_range_t::generate_test_instances(list<client_writeable_range_t*>& ls)
+{
+ ls.push_back(new client_writeable_range_t);
+ ls.push_back(new client_writeable_range_t);
+ ls.back()->range.first = 123;
+ ls.back()->range.last = 456;
+ ls.back()->follows = 12;
+}
+
+ostream& operator<<(ostream& out, const client_writeable_range_t& r)
+{
+ return out << r.range.first << '-' << r.range.last << "@" << r.follows;
+}
+
+
+/*
+ * inode_t
+ */
+void inode_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(6, 6, bl);
+
+ ::encode(ino, bl);
+ ::encode(rdev, bl);
+ ::encode(ctime, bl);
+
+ ::encode(mode, bl);
+ ::encode(uid, bl);
+ ::encode(gid, bl);
+
+ ::encode(nlink, bl);
+ ::encode(anchored, bl);
+
+ ::encode(dir_layout, bl);
+ ::encode(layout, bl);
+ ::encode(size, bl);
+ ::encode(truncate_seq, bl);
+ ::encode(truncate_size, bl);
+ ::encode(truncate_from, bl);
+ ::encode(truncate_pending, bl);
+ ::encode(mtime, bl);
+ ::encode(atime, bl);
+ ::encode(time_warp_seq, bl);
+ ::encode(client_ranges, bl);
+
+ ::encode(dirstat, bl);
+ ::encode(rstat, bl);
+ ::encode(accounted_rstat, bl);
+
+ ::encode(version, bl);
+ ::encode(file_data_version, bl);
+ ::encode(xattr_version, bl);
+ ::encode(last_renamed_version, bl);
+
+ ENCODE_FINISH(bl);
+}
+
+void inode_t::decode(bufferlist::iterator &p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(6, 6, 6, p);
+
+ ::decode(ino, p);
+ ::decode(rdev, p);
+ ::decode(ctime, p);
+
+ ::decode(mode, p);
+ ::decode(uid, p);
+ ::decode(gid, p);
+
+ ::decode(nlink, p);
+ ::decode(anchored, p);
+
+ if (struct_v >= 4)
+ ::decode(dir_layout, p);
+ else
+ memset(&dir_layout, 0, sizeof(dir_layout));
+ ::decode(layout, p);
+ ::decode(size, p);
+ ::decode(truncate_seq, p);
+ ::decode(truncate_size, p);
+ ::decode(truncate_from, p);
+ if (struct_v >= 5)
+ ::decode(truncate_pending, p);
+ else
+ truncate_pending = 0;
+ ::decode(mtime, p);
+ ::decode(atime, p);
+ ::decode(time_warp_seq, p);
+ if (struct_v >= 3) {
+ ::decode(client_ranges, p);
+ } else {
+ map<client_t, client_writeable_range_t::byte_range_t> m;
+ ::decode(m, p);
+ for (map<client_t, client_writeable_range_t::byte_range_t>::iterator
+ q = m.begin(); q != m.end(); q++)
+ client_ranges[q->first].range = q->second;
+ }
+
+ ::decode(dirstat, p);
+ ::decode(rstat, p);
+ ::decode(accounted_rstat, p);
+
+ ::decode(version, p);
+ ::decode(file_data_version, p);
+ ::decode(xattr_version, p);
+ if (struct_v >= 2)
+ ::decode(last_renamed_version, p);
+
+ DECODE_FINISH(p);
+}
+
+void inode_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("ino", ino);
+ f->dump_unsigned("rdev", rdev);
+ f->dump_stream("ctime") << ctime;
+ f->dump_unsigned("mode", mode);
+ f->dump_unsigned("uid", uid);
+ f->dump_unsigned("gid", gid);
+ f->dump_unsigned("nlink", nlink);
+ f->dump_unsigned("anchored", (int)anchored);
+
+ f->open_object_section("dir_layout");
+ ::dump(dir_layout, f);
+ f->close_section();
+
+ f->open_object_section("layout");
+ ::dump(layout, f);
+ f->close_section();
+
+ f->dump_unsigned("size", size);
+ f->dump_unsigned("truncate_seq", truncate_seq);
+ f->dump_unsigned("truncate_size", truncate_size);
+ f->dump_unsigned("truncate_from", truncate_from);
+ f->dump_unsigned("truncate_pending", truncate_pending);
+ f->dump_stream("mtime") << mtime;
+ f->dump_stream("atime") << atime;
+ f->dump_unsigned("time_warp_seq", time_warp_seq);
+
+ f->open_array_section("client_ranges");
+ for (map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); p != client_ranges.end(); ++p) {
+ f->open_object_section("client");
+ f->dump_unsigned("client", p->first.v);
+ p->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_object_section("dirstat");
+ dirstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("rstat");
+ rstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("accounted_rstat");
+ accounted_rstat.dump(f);
+ f->close_section();
+
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("file_data_version", file_data_version);
+ f->dump_unsigned("xattr_version", xattr_version);
+ f->dump_unsigned("last_renamed_version", last_renamed_version);
+}
+
+void inode_t::generate_test_instances(list<inode_t*>& ls)
+{
+ ls.push_back(new inode_t);
+ ls.push_back(new inode_t);
+ ls.back()->ino = 1;
+ // i am lazy.
+}
+
+
+/*
+ * old_inode_t
+ */
+void old_inode_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(first, bl);
+ ::encode(inode, bl);
+ ::encode(xattrs, bl);
+ ENCODE_FINISH(bl);
+}
+
+void old_inode_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(first, bl);
+ ::decode(inode, bl);
+ ::decode(xattrs, bl);
+ DECODE_FINISH(bl);
+}
+
+void old_inode_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("first", first);
+ inode.dump(f);
+ f->open_object_section("xattrs");
+ for (map<string,bufferptr>::const_iterator p = xattrs.begin(); p != xattrs.end(); ++p) {
+ string v(p->second.c_str(), p->second.length());
+ f->dump_string(p->first.c_str(), v);
+ }
+ f->close_section();
+}
+
+void old_inode_t::generate_test_instances(list<old_inode_t*>& ls)
+{
+ ls.push_back(new old_inode_t);
+ ls.push_back(new old_inode_t);
+ ls.back()->first = 2;
+ list<inode_t*> ils;
+ inode_t::generate_test_instances(ils);
+ ls.back()->inode = *ils.back();
+ ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4);
+ ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3);
+}
+
+
+/*
+ * fnode_t
+ */
+void fnode_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(version, bl);
+ ::encode(snap_purged_thru, bl);
+ ::encode(fragstat, bl);
+ ::encode(accounted_fragstat, bl);
+ ::encode(rstat, bl);
+ ::encode(accounted_rstat, bl);
+ ENCODE_FINISH(bl);
+}
+
+void fnode_t::decode(bufferlist::iterator &bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(version, bl);
+ ::decode(snap_purged_thru, bl);
+ ::decode(fragstat, bl);
+ ::decode(accounted_fragstat, bl);
+ ::decode(rstat, bl);
+ ::decode(accounted_rstat, bl);
+ DECODE_FINISH(bl);
+}
+
+void fnode_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("snap_purged_thru", snap_purged_thru);
+
+ f->open_object_section("fragstat");
+ fragstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("accounted_fragstat");
+ accounted_fragstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("rstat");
+ rstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("accounted_rstat");
+ accounted_rstat.dump(f);
+ f->close_section();
+}
+
+void fnode_t::generate_test_instances(list<fnode_t*>& ls)
+{
+ ls.push_back(new fnode_t);
+ ls.push_back(new fnode_t);
+ ls.back()->version = 1;
+ ls.back()->snap_purged_thru = 2;
+ list<frag_info_t*> fls;
+ frag_info_t::generate_test_instances(fls);
+ ls.back()->fragstat = *fls.back();
+ ls.back()->accounted_fragstat = *fls.front();
+ list<nest_info_t*> nls;
+ nest_info_t::generate_test_instances(nls);
+ ls.back()->rstat = *nls.front();
+ ls.back()->accounted_rstat = *nls.back();
+}
+
+
+/*
+ * old_rstat_t
+ */
+void old_rstat_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(first, bl);
+ ::encode(rstat, bl);
+ ::encode(accounted_rstat, bl);
+ ENCODE_FINISH(bl);
+}
+
+void old_rstat_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(first, bl);
+ ::decode(rstat, bl);
+ ::decode(accounted_rstat, bl);
+ DECODE_FINISH(bl);
+}
+
+void old_rstat_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("snapid", first);
+ f->open_object_section("rstat");
+ rstat.dump(f);
+ f->close_section();
+ f->open_object_section("accounted_rstat");
+ accounted_rstat.dump(f);
+ f->close_section();
+}
+
+void old_rstat_t::generate_test_instances(list<old_rstat_t*>& ls)
+{
+ ls.push_back(new old_rstat_t());
+ ls.push_back(new old_rstat_t());
+ ls.back()->first = 12;
+ list<nest_info_t*> nls;
+ nest_info_t::generate_test_instances(nls);
+ ls.back()->rstat = *nls.back();
+ ls.back()->accounted_rstat = *nls.front();
+}
+
+/*
+ * session_info_t
+ */
+void session_info_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(inst, bl);
+ ::encode(completed_requests, bl);
+ ::encode(prealloc_inos, bl); // hacky, see below.
+ ::encode(used_inos, bl);
+ ENCODE_FINISH(bl);
+}
+
+void session_info_t::decode(bufferlist::iterator& p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
+ ::decode(inst, p);
+ ::decode(completed_requests, p);
+ ::decode(prealloc_inos, p);
+ ::decode(used_inos, p);
+ prealloc_inos.insert(used_inos);
+ used_inos.clear();
+ DECODE_FINISH(p);
+}
+
+void session_info_t::dump(Formatter *f) const
+{
+ f->dump_stream("inst") << inst;
+
+ f->open_array_section("completed_requests");
+ for (set<tid_t>::const_iterator p = completed_requests.begin();
+ p != completed_requests.end();
+ ++p)
+ f->dump_unsigned("tid", *p);
+ f->close_section();
+
+ f->open_array_section("prealloc_inos");
+ for (interval_set<inodeno_t>::const_iterator p = prealloc_inos.begin();
+ p != prealloc_inos.end();
+ ++p) {
+ f->open_object_section("ino_range");
+ f->dump_unsigned("start", p.get_start());
+ f->dump_unsigned("length", p.get_len());
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_array_section("used_inos");
+ for (interval_set<inodeno_t>::const_iterator p = prealloc_inos.begin();
+ p != prealloc_inos.end();
+ ++p) {
+ f->open_object_section("ino_range");
+ f->dump_unsigned("start", p.get_start());
+ f->dump_unsigned("length", p.get_len());
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void session_info_t::generate_test_instances(list<session_info_t*>& ls)
+{
+ ls.push_back(new session_info_t);
+ ls.push_back(new session_info_t);
+ ls.back()->inst = entity_inst_t(entity_name_t::MDS(12), entity_addr_t());
+ ls.back()->completed_requests.insert(234);
+ ls.back()->completed_requests.insert(237);
+ ls.back()->prealloc_inos.insert(333, 12);
+ ls.back()->prealloc_inos.insert(377, 112);
+ // we can't add used inos; they're cleared on decode
+}
+
+
+/*
+ * string_snap_t
+ */
+void string_snap_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(name, bl);
+ ::encode(snapid, bl);
+ ENCODE_FINISH(bl);
+}
+
+void string_snap_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(name, bl);
+ ::decode(snapid, bl);
+ DECODE_FINISH(bl);
+}
+
+void string_snap_t::dump(Formatter *f) const
+{
+ f->dump_string("name", name);
+ f->dump_unsigned("snapid", snapid);
+}
+
+void string_snap_t::generate_test_instances(list<string_snap_t*>& ls)
+{
+ ls.push_back(new string_snap_t);
+ ls.push_back(new string_snap_t);
+ ls.back()->name = "foo";
+ ls.back()->snapid = 123;
+ ls.push_back(new string_snap_t);
+ ls.back()->name = "bar";
+ ls.back()->snapid = 456;
+}
+
+
+/*
+ * MDSCacheObjectInfo
+ */
+void MDSCacheObjectInfo::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(ino, bl);
+ ::encode(dirfrag, bl);
+ ::encode(dname, bl);
+ ::encode(snapid, bl);
+ ENCODE_FINISH(bl);
+}
+
+void MDSCacheObjectInfo::decode(bufferlist::iterator& p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
+ ::decode(ino, p);
+ ::decode(dirfrag, p);
+ ::decode(dname, p);
+ ::decode(snapid, p);
+ DECODE_FINISH(p);
+}
+
+void MDSCacheObjectInfo::dump(Formatter *f) const
+{
+ f->dump_unsigned("ino", ino);
+ f->dump_stream("dirfrag") << dirfrag;
+ f->dump_string("name", dname);
+ f->dump_unsigned("snapid", snapid);
+}
+
+void MDSCacheObjectInfo::generate_test_instances(list<MDSCacheObjectInfo*>& ls)
+{
+ ls.push_back(new MDSCacheObjectInfo);
+ ls.push_back(new MDSCacheObjectInfo);
+ ls.back()->ino = 1;
+ ls.back()->dirfrag = dirfrag_t(2, 3);
+ ls.back()->dname = "fooname";
+ ls.back()->snapid = CEPH_NOSNAP;
+ ls.push_back(new MDSCacheObjectInfo);
+ ls.back()->ino = 121;
+ ls.back()->dirfrag = dirfrag_t(222, 0);
+ ls.back()->dname = "bar foo";
+ ls.back()->snapid = 21322;
+}
+
+
+/*
+ * mds_table_pending_t
+ */
+void mds_table_pending_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(2, 2, bl);
+ ::encode(reqid, bl);
+ ::encode(mds, bl);
+ ::encode(tid, bl);
+ ENCODE_FINISH(bl);
+}
+
+void mds_table_pending_t::decode(bufferlist::iterator& bl)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(reqid, bl);
+ ::decode(mds, bl);
+ ::decode(tid, bl);
+ DECODE_FINISH(bl);
+}
+
+void mds_table_pending_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("reqid", reqid);
+ f->dump_unsigned("mds", mds);
+ f->dump_unsigned("tid", tid);
+}
+
+void mds_table_pending_t::generate_test_instances(list<mds_table_pending_t*>& ls)
+{
+ ls.push_back(new mds_table_pending_t);
+ ls.push_back(new mds_table_pending_t);
+ ls.back()->reqid = 234;
+ ls.back()->mds = 2;
+ ls.back()->tid = 35434;
+}
+
+
+/*
+ * inode_load_vec_t
+ */
+void inode_load_vec_t::encode(bufferlist &bl) const
+{
+ ENCODE_START(2, 2, bl);
+ for (int i=0; i<NUM; i++)
+ ::encode(vec[i], bl);
+ ENCODE_FINISH(bl);
+}
+
+void inode_load_vec_t::decode(const utime_t &t, bufferlist::iterator &p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
+ for (int i=0; i<NUM; i++)
+ ::decode(vec[i], t, p);
+ DECODE_FINISH(p);
+}
+
+void inode_load_vec_t::dump(Formatter *f)
+{
+ f->open_array_section("Decay Counters");
+ for (vector<DecayCounter>::const_iterator i = vec.begin(); i != vec.end(); ++i) {
+ f->open_object_section("Decay Counter");
+ i->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void inode_load_vec_t::generate_test_instances(list<inode_load_vec_t*>& ls)
+{
+ utime_t sample;
+ ls.push_back(new inode_load_vec_t(sample));
+}
+
+
+/*
+ * dirfrag_load_vec_t
+ */
+void dirfrag_load_vec_t::dump(Formatter *f) const
+{
+ f->open_array_section("Decay Counters");
+ for (vector<DecayCounter>::const_iterator i = vec.begin(); i != vec.end(); ++i) {
+ f->open_object_section("Decay Counter");
+ i->dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void dirfrag_load_vec_t::generate_test_instances(list<dirfrag_load_vec_t*>& ls)
+{
+ utime_t sample;
+ ls.push_back(new dirfrag_load_vec_t(sample));
+}
+
+/*
+ * mds_load_t
+ */
+void mds_load_t::encode(bufferlist &bl) const {
+ ENCODE_START(2, 2, bl);
+ ::encode(auth, bl);
+ ::encode(all, bl);
+ ::encode(req_rate, bl);
+ ::encode(cache_hit_rate, bl);
+ ::encode(queue_len, bl);
+ ::encode(cpu_load_avg, bl);
+ ENCODE_FINISH(bl);
+}
+
+void mds_load_t::decode(const utime_t &t, bufferlist::iterator &bl) {
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(auth, t, bl);
+ ::decode(all, t, bl);
+ ::decode(req_rate, bl);
+ ::decode(cache_hit_rate, bl);
+ ::decode(queue_len, bl);
+ ::decode(cpu_load_avg, bl);
+ DECODE_FINISH(bl);
+}
+
+void mds_load_t::dump(Formatter *f) const
+{
+ f->dump_float("request rate", req_rate);
+ f->dump_float("cache hit rate", cache_hit_rate);
+ f->dump_float("queue length", queue_len);
+ f->dump_float("cpu load", cpu_load_avg);
+ f->open_object_section("auth dirfrag");
+ auth.dump(f);
+ f->close_section();
+ f->open_object_section("all dirfrags");
+ all.dump(f);
+ f->close_section();
+}
+
+void mds_load_t::generate_test_instances(list<mds_load_t*>& ls)
+{
+ utime_t sample;
+ ls.push_back(new mds_load_t(sample));
+}
+
+/*
+ * cap_reconnect_t
+ */
+void cap_reconnect_t::encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode_old(bl); // extract out when something changes
+ ENCODE_FINISH(bl);
+}
+
+void cap_reconnect_t::encode_old(bufferlist& bl) const {
+ ::encode(path, bl);
+ capinfo.flock_len = flockbl.length();
+ ::encode(capinfo, bl);
+ ::encode_nohead(flockbl, bl);
+}
+
+void cap_reconnect_t::decode(bufferlist::iterator& bl) {
+ DECODE_START(1, bl);
+ decode_old(bl); // extract out when something changes
+ DECODE_FINISH(bl);
+}
+
+void cap_reconnect_t::decode_old(bufferlist::iterator& bl) {
+ ::decode(path, bl);
+ ::decode(capinfo, bl);
+ ::decode_nohead(capinfo.flock_len, flockbl, bl);
+}
+
+void cap_reconnect_t::dump(Formatter *f) const
+{
+ f->dump_string("path", path);
+ f->dump_int("cap_id", capinfo.cap_id);
+ f->dump_string("cap wanted", ccap_string(capinfo.wanted));
+ f->dump_string("cap issued", ccap_string(capinfo.issued));
+ f->dump_int("snaprealm", capinfo.snaprealm);
+ f->dump_int("path base ino", capinfo.pathbase);
+ f->dump_string("has file locks", capinfo.flock_len ? "true" : "false");
+}
+
+void cap_reconnect_t::generate_test_instances(list<cap_reconnect_t*>& ls)
+{
+ ls.push_back(new cap_reconnect_t);
+ ls.back()->path = "/test/path";
+ ls.back()->capinfo.cap_id = 1;
+}
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 74b8571e0f3..7b25d3a5c27 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -17,6 +17,7 @@ using namespace std;
#include "include/frag.h"
#include "include/xlist.h"
+#include "include/interval_set.h"
#include "inode_backtrace.h"
@@ -107,6 +108,25 @@ inline string ccap_string(int cap)
}
+/**
+ * Default file layout stuff. This lets us set a default file layout on
+ * a directory inode that all files in its tree will use on creation.
+ */
+struct file_layout_policy_t {
+ ceph_file_layout layout;
+
+ file_layout_policy_t() {
+ memset(&layout, 0, sizeof(layout));
+ }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<file_layout_policy_t*>& ls);
+};
+WRITE_CLASS_ENCODER(file_layout_policy_t);
+
+
struct scatter_info_t {
version_t version;
@@ -144,24 +164,10 @@ struct frag_info_t : public scatter_info_t {
nsubdirs += other.nsubdirs;
}
- void encode(bufferlist &bl) const {
- __u8 v = 1;
- ::encode(v, bl);
-
- ::encode(version, bl);
- ::encode(mtime, bl);
- ::encode(nfiles, bl);
- ::encode(nsubdirs, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 v;
- ::decode(v, bl);
-
- ::decode(version, bl);
- ::decode(mtime, bl);
- ::decode(nfiles, bl);
- ::decode(nsubdirs, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<frag_info_t*>& ls);
};
WRITE_CLASS_ENCODER(frag_info_t)
@@ -169,17 +175,8 @@ inline bool operator==(const frag_info_t &l, const frag_info_t &r) {
return memcmp(&l, &r, sizeof(l)) == 0;
}
-inline ostream& operator<<(ostream &out, const frag_info_t &f) {
- if (f == frag_info_t())
- return out << "f()";
- out << "f(v" << f.version;
- if (f.mtime != utime_t())
- out << " m" << f.mtime;
- if (f.nfiles || f.nsubdirs)
- out << " " << f.size() << "=" << f.nfiles << "+" << f.nsubdirs;
- out << ")";
- return out;
-}
+ostream& operator<<(ostream &out, const frag_info_t &f);
+
struct nest_info_t : public scatter_info_t {
// this frag + children
@@ -223,30 +220,10 @@ struct nest_info_t : public scatter_info_t {
rsnaprealms += cur.rsnaprealms - acc.rsnaprealms;
}
- void encode(bufferlist &bl) const {
- __u8 v = 1;
- ::encode(v, bl);
-
- ::encode(version, bl);
- ::encode(rbytes, bl);
- ::encode(rfiles, bl);
- ::encode(rsubdirs, bl);
- ::encode(ranchors, bl);
- ::encode(rsnaprealms, bl);
- ::encode(rctime, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 v;
- ::decode(v, bl);
-
- ::decode(version, bl);
- ::decode(rbytes, bl);
- ::decode(rfiles, bl);
- ::decode(rsubdirs, bl);
- ::decode(ranchors, bl);
- ::decode(rsnaprealms, bl);
- ::decode(rctime, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<nest_info_t*>& ls);
};
WRITE_CLASS_ENCODER(nest_info_t)
@@ -254,23 +231,8 @@ inline bool operator==(const nest_info_t &l, const nest_info_t &r) {
return memcmp(&l, &r, sizeof(l)) == 0;
}
-inline ostream& operator<<(ostream &out, const nest_info_t &n) {
- if (n == nest_info_t())
- return out << "n()";
- out << "n(v" << n.version;
- if (n.rctime != utime_t())
- out << " rc" << n.rctime;
- if (n.rbytes)
- out << " b" << n.rbytes;
- if (n.ranchors)
- out << " a" << n.ranchors;
- if (n.rsnaprealms)
- out << " sr" << n.rsnaprealms;
- if (n.rfiles || n.rsubdirs)
- out << " " << n.rsize() << "=" << n.rfiles << "+" << n.rsubdirs;
- out << ")";
- return out;
-}
+ostream& operator<<(ostream &out, const nest_info_t &n);
+
struct vinodeno_t {
inodeno_t ino;
@@ -321,59 +283,45 @@ inline ostream& operator<<(ostream &out, const vinodeno_t &vino) {
}
-struct byte_range_t {
- uint64_t first, last; // interval client can write to
+/*
+ * client_writeable_range_t
+ */
+struct client_writeable_range_t {
+ struct byte_range_t {
+ uint64_t first, last; // interval client can write to
+ byte_range_t() : first(0), last(0) {}
+ };
- byte_range_t() : first(0), last(0) {}
+ byte_range_t range;
+ snapid_t follows; // aka "data+metadata flushed thru"
- void encode(bufferlist &bl) const {
- ::encode(first, bl);
- ::encode(last, bl);
- }
- void decode(bufferlist::iterator& bl) {
- ::decode(first, bl);
- ::decode(last, bl);
- }
+ client_writeable_range_t() : follows(0) {}
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<client_writeable_range_t*>& ls);
};
-WRITE_CLASS_ENCODER(byte_range_t)
-inline ostream& operator<<(ostream& out, const byte_range_t& r)
-{
- return out << r.first << '-' << r.last;
-}
-inline bool operator==(const byte_range_t& l, const byte_range_t& r) {
- return l.first == r.first && l.last == r.last;
+inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::iterator& bl) {
+ ::decode(range.first, bl);
+ ::decode(range.last, bl);
}
-
-struct client_writeable_range_t {
- byte_range_t range;
- snapid_t follows; // aka "data+metadata flushed thru"
-
- void encode(bufferlist &bl) const {
- __u8 v = 1;
- ::encode(v, bl);
- ::encode(range, bl);
- ::encode(follows, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 v;
- ::decode(v, bl);
- ::decode(range, bl);
- ::decode(follows, bl);
- }
-};
WRITE_CLASS_ENCODER(client_writeable_range_t)
-inline ostream& operator<<(ostream& out, const client_writeable_range_t& r)
-{
- return out << r.range << "@" << r.follows;
-}
-inline bool operator==(const client_writeable_range_t& l, const client_writeable_range_t& r) {
- return l.range == r.range && l.follows == r.follows;
+ostream& operator<<(ostream& out, const client_writeable_range_t& r);
+
+inline bool operator==(const client_writeable_range_t& l,
+ const client_writeable_range_t& r) {
+ return l.range.first == r.range.first && l.range.last == r.range.last &&
+ l.follows == r.follows;
}
+/*
+ * inode_t
+ */
struct inode_t {
// base (immutable)
inodeno_t ino;
@@ -479,115 +427,26 @@ struct inode_t {
}
}
- void encode(bufferlist &bl) const {
- __u8 v = 5;
- ::encode(v, bl);
-
- ::encode(ino, bl);
- ::encode(rdev, bl);
- ::encode(ctime, bl);
-
- ::encode(mode, bl);
- ::encode(uid, bl);
- ::encode(gid, bl);
-
- ::encode(nlink, bl);
- ::encode(anchored, bl);
-
- ::encode(dir_layout, bl);
- ::encode(layout, bl);
- ::encode(size, bl);
- ::encode(truncate_seq, bl);
- ::encode(truncate_size, bl);
- ::encode(truncate_from, bl);
- ::encode(truncate_pending, bl);
- ::encode(mtime, bl);
- ::encode(atime, bl);
- ::encode(time_warp_seq, bl);
- ::encode(client_ranges, bl);
-
- ::encode(dirstat, bl);
- ::encode(rstat, bl);
- ::encode(accounted_rstat, bl);
-
- ::encode(version, bl);
- ::encode(file_data_version, bl);
- ::encode(xattr_version, bl);
- ::encode(last_renamed_version, bl);
- }
- void decode(bufferlist::iterator &p) {
- __u8 v;
- ::decode(v, p);
-
- ::decode(ino, p);
- ::decode(rdev, p);
- ::decode(ctime, p);
-
- ::decode(mode, p);
- ::decode(uid, p);
- ::decode(gid, p);
-
- ::decode(nlink, p);
- ::decode(anchored, p);
-
- if (v >= 4)
- ::decode(dir_layout, p);
- else
- memset(&dir_layout, 0, sizeof(dir_layout));
- ::decode(layout, p);
- ::decode(size, p);
- ::decode(truncate_seq, p);
- ::decode(truncate_size, p);
- ::decode(truncate_from, p);
- if (v >= 5)
- ::decode(truncate_pending, p);
- else
- truncate_pending = 0;
- ::decode(mtime, p);
- ::decode(atime, p);
- ::decode(time_warp_seq, p);
- if (v >= 3) {
- ::decode(client_ranges, p);
- } else {
- map<client_t, byte_range_t> m;
- ::decode(m, p);
- for (map<client_t, byte_range_t>::iterator q = m.begin(); q != m.end(); q++)
- client_ranges[q->first].range = q->second;
- }
-
- ::decode(dirstat, p);
- ::decode(rstat, p);
- ::decode(accounted_rstat, p);
-
- ::decode(version, p);
- ::decode(file_data_version, p);
- ::decode(xattr_version, p);
- if (v >= 2)
- ::decode(last_renamed_version, p);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<inode_t*>& ls);
};
WRITE_CLASS_ENCODER(inode_t)
+/*
+ * old_inode_t
+ */
struct old_inode_t {
snapid_t first;
inode_t inode;
map<string,bufferptr> xattrs;
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(first, bl);
- ::encode(inode, bl);
- ::encode(xattrs, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(first, bl);
- ::decode(inode, bl);
- ::decode(xattrs, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<old_inode_t*>& ls);
};
WRITE_CLASS_ENCODER(old_inode_t)
@@ -601,26 +460,10 @@ struct fnode_t {
frag_info_t fragstat, accounted_fragstat;
nest_info_t rstat, accounted_rstat;
- void encode(bufferlist &bl) const {
- __u8 v = 1;
- ::encode(v, bl);
- ::encode(version, bl);
- ::encode(snap_purged_thru, bl);
- ::encode(fragstat, bl);
- ::encode(accounted_fragstat, bl);
- ::encode(rstat, bl);
- ::encode(accounted_rstat, bl);
- }
- void decode(bufferlist::iterator &bl) {
- __u8 v;
- ::decode(v, bl);
- ::decode(version, bl);
- ::decode(snap_purged_thru, bl);
- ::decode(fragstat, bl);
- ::decode(accounted_fragstat, bl);
- ::decode(rstat, bl);
- ::decode(accounted_rstat, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<fnode_t*>& ls);
};
WRITE_CLASS_ENCODER(fnode_t)
@@ -629,20 +472,10 @@ struct old_rstat_t {
snapid_t first;
nest_info_t rstat, accounted_rstat;
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(first, bl);
- ::encode(rstat, bl);
- ::encode(accounted_rstat, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(first, bl);
- ::decode(rstat, bl);
- ::decode(accounted_rstat, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<old_rstat_t*>& ls);
};
WRITE_CLASS_ENCODER(old_rstat_t)
@@ -651,6 +484,31 @@ inline ostream& operator<<(ostream& out, const old_rstat_t& o) {
}
+/*
+ * session_info_t
+ */
+
+struct session_info_t {
+ entity_inst_t inst;
+ set<tid_t> completed_requests;
+ interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use.
+ interval_set<inodeno_t> used_inos; // journaling use
+
+ client_t get_client() const { return client_t(inst.name.num()); }
+
+ void clear_meta() {
+ prealloc_inos.clear();
+ used_inos.clear();
+ completed_requests.clear();
+ }
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<session_info_t*>& ls);
+};
+WRITE_CLASS_ENCODER(session_info_t)
+
// =======
// dentries
@@ -729,18 +587,11 @@ struct string_snap_t {
string_snap_t() {}
string_snap_t(const string& n, snapid_t s) : name(n), snapid(s) {}
string_snap_t(const char *n, snapid_t s) : name(n), snapid(s) {}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(name, bl);
- ::encode(snapid, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v = 1;
- ::decode(struct_v, bl);
- ::decode(name, bl);
- ::decode(snapid, bl);
- }
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<string_snap_t*>& ls);
};
WRITE_CLASS_ENCODER(string_snap_t)
@@ -754,6 +605,23 @@ inline ostream& operator<<(ostream& out, const string_snap_t &k)
return out << "(" << k.name << "," << k.snapid << ")";
}
+/*
+ * mds_table_pending_t
+ *
+ * mds's requesting any pending ops. child needs to encode the corresponding
+ * pending mutation state in the table.
+ */
+struct mds_table_pending_t {
+ uint64_t reqid;
+ __s32 mds;
+ version_t tid;
+ mds_table_pending_t() : reqid(0), mds(0), tid(0) {}
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<mds_table_pending_t*>& ls);
+};
+WRITE_CLASS_ENCODER(mds_table_pending_t)
// =========
@@ -824,18 +692,13 @@ struct cap_reconnect_t {
capinfo.pathbase = pino;
capinfo.flock_len = 0;
}
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void encode_old(bufferlist& bl) const;
+ void decode_old(bufferlist::iterator& bl);
- void encode(bufferlist& bl) const {
- ::encode(path, bl);
- capinfo.flock_len = flockbl.length();
- ::encode(capinfo, bl);
- ::encode_nohead(flockbl, bl);
- }
- void decode(bufferlist::iterator& bl) {
- ::decode(path, bl);
- ::decode(capinfo, bl);
- ::decode_nohead(capinfo.flock_len, flockbl, bl);
- }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<cap_reconnect_t*>& ls);
};
WRITE_CLASS_ENCODER(cap_reconnect_t)
@@ -951,8 +814,11 @@ class inode_load_vec_t {
public:
inode_load_vec_t(const utime_t &now)
: vec(NUM, DecayCounter(now))
- {
- }
+ {}
+ // for dencoder infrastructure
+ inode_load_vec_t() :
+ vec(NUM, DecayCounter())
+ {}
DecayCounter &get(int t) {
assert(t < NUM);
return vec[t];
@@ -961,18 +827,12 @@ public:
for (int i=0; i<NUM; i++)
vec[i].reset(now);
}
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- for (int i=0; i<NUM; i++)
- ::encode(vec[i], bl);
- }
- void decode(const utime_t &t, bufferlist::iterator &p) {
- __u8 struct_v;
- ::decode(struct_v, p);
- for (int i=0; i<NUM; i++)
- ::decode(vec[i], t, p);
- }
+ void encode(bufferlist &bl) const;
+ void decode(const utime_t &t, bufferlist::iterator &p);
+ // for dencoder
+ void decode(bufferlist::iterator& p) { utime_t sample; decode(sample, p); }
+ void dump(Formatter *f);
+ static void generate_test_instances(list<inode_load_vec_t*>& ls);
};
inline void encode(const inode_load_vec_t &c, bufferlist &bl) { c.encode(bl); }
inline void decode(inode_load_vec_t & c, const utime_t &t, bufferlist::iterator &p) {
@@ -985,20 +845,30 @@ public:
std::vector < DecayCounter > vec;
dirfrag_load_vec_t(const utime_t &now)
: vec(NUM, DecayCounter(now))
- {
- }
+ { }
+ // for dencoder infrastructure
+ dirfrag_load_vec_t()
+ : vec(NUM, DecayCounter())
+ {}
void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
+ ENCODE_START(2, 2, bl);
for (int i=0; i<NUM; i++)
::encode(vec[i], bl);
+ ENCODE_FINISH(bl);
}
void decode(const utime_t &t, bufferlist::iterator &p) {
- __u8 struct_v;
- ::decode(struct_v, p);
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
for (int i=0; i<NUM; i++)
::decode(vec[i], t, p);
+ DECODE_FINISH(p);
}
+ // for dencoder infrastructure
+ void decode(bufferlist::iterator& p) {
+ utime_t sample;
+ decode(sample, p);
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<dirfrag_load_vec_t*>& ls);
DecayCounter &get(int t) {
assert(t < NUM);
@@ -1080,31 +950,20 @@ struct mds_load_t {
mds_load_t(const utime_t &t) :
auth(t), all(t), req_rate(0), cache_hit_rate(0),
queue_len(0), cpu_load_avg(0)
- {
- }
+ {}
+ // mostly for the dencoder infrastructure
+ mds_load_t() :
+ auth(), all(),
+ req_rate(0), cache_hit_rate(0), queue_len(0), cpu_load_avg(0)
+ {}
double mds_load(); // defiend in MDBalancer.cc
-
- void encode(bufferlist &bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(auth, bl);
- ::encode(all, bl);
- ::encode(req_rate, bl);
- ::encode(cache_hit_rate, bl);
- ::encode(queue_len, bl);
- ::encode(cpu_load_avg, bl);
- }
- void decode(const utime_t &t, bufferlist::iterator &bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(auth, t, bl);
- ::decode(all, t, bl);
- ::decode(req_rate, bl);
- ::decode(cache_hit_rate, bl);
- ::decode(queue_len, bl);
- ::decode(cpu_load_avg, bl);
- }
+ void encode(bufferlist& bl) const;
+ void decode(const utime_t& now, bufferlist::iterator& bl);
+ //this one is for dencoder infrastructure
+ void decode(bufferlist::iterator& bl) { utime_t sample; decode(sample, bl); }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<mds_load_t*>& ls);
};
inline void encode(const mds_load_t &c, bufferlist &bl) { c.encode(bl); }
inline void decode(mds_load_t &c, const utime_t &t, bufferlist::iterator &p) {
@@ -1121,26 +980,6 @@ inline ostream& operator<<( ostream& out, mds_load_t& load )
<< ">";
}
-/*
-inline mds_load_t& operator+=( mds_load_t& l, mds_load_t& r )
-{
- l.root_pop += r.root_pop;
- l.req_rate += r.req_rate;
- l.queue_len += r.queue_len;
- return l;
-}
-
-inline mds_load_t operator/( mds_load_t& a, double d )
-{
- mds_load_t r;
- r.root_pop = a.root_pop / d;
- r.req_rate = a.req_rate / d;
- r.queue_len = a.queue_len / d;
- return r;
-}
-*/
-
-
class load_spread_t {
public:
static const int MAX = 4;
@@ -1234,22 +1073,10 @@ public:
MDSCacheObjectInfo() : ino(0) {}
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(ino, bl);
- ::encode(dirfrag, bl);
- ::encode(dname, bl);
- ::encode(snapid, bl);
- }
- void decode(bufferlist::iterator& p) {
- __u8 struct_v;
- ::decode(struct_v, p);
- ::decode(ino, p);
- ::decode(dirfrag, p);
- ::decode(dname, p);
- ::decode(snapid, p);
- }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<MDSCacheObjectInfo*>& ls);
};
inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) {
diff --git a/src/mds/snap.cc b/src/mds/snap.cc
index fa434b79d02..06dc95590c9 100644
--- a/src/mds/snap.cc
+++ b/src/mds/snap.cc
@@ -13,253 +13,57 @@
*/
#include "snap.h"
-#include "MDCache.h"
-#include "MDS.h"
-#include "messages/MClientSnap.h"
+#include "common/Formatter.h"
/*
- * SnapRealm
+ * SnapInfo
*/
-#define dout_subsys ceph_subsys_mds
-#undef dout_prefix
-#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
-static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode,
- uint64_t seq, SnapRealm *realm) {
- return *_dout << " mds." << whoami
- << ".cache.snaprealm(" << inode->ino()
- << " seq " << seq << " " << realm << ") ";
-}
-
-ostream& operator<<(ostream& out, const SnapRealm& realm)
+void SnapInfo::encode(bufferlist& bl) const
{
- out << "snaprealm(" << realm.inode->ino()
- << " seq " << realm.srnode.seq
- << " lc " << realm.srnode.last_created
- << " cr " << realm.srnode.created;
- if (realm.srnode.created != realm.srnode.current_parent_since)
- out << " cps " << realm.srnode.current_parent_since;
- out << " snaps=" << realm.srnode.snaps;
- if (realm.srnode.past_parents.size()) {
- out << " past_parents=(";
- for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
- p != realm.srnode.past_parents.end();
- p++) {
- if (p != realm.srnode.past_parents.begin()) out << ",";
- out << p->second.first << "-" << p->first
- << "=" << p->second.ino;
- }
- out << ")";
- }
- out << " " << &realm << ")";
- return out;
+ ENCODE_START(2, 2, bl);
+ ::encode(snapid, bl);
+ ::encode(ino, bl);
+ ::encode(stamp, bl);
+ ::encode(name, bl);
+ ENCODE_FINISH(bl);
}
-
-
-
-void SnapRealm::add_open_past_parent(SnapRealm *parent)
+void SnapInfo::decode(bufferlist::iterator& bl)
{
- open_past_parents[parent->inode->ino()] = parent;
- parent->inode->get(CInode::PIN_PASTSNAPPARENT);
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(snapid, bl);
+ ::decode(ino, bl);
+ ::decode(stamp, bl);
+ ::decode(name, bl);
+ DECODE_FINISH(bl);
}
-bool SnapRealm::_open_parents(Context *finish, snapid_t first, snapid_t last)
+void SnapInfo::dump(Formatter *f) const
{
- dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- // make sure my current parents' parents are open...
- if (parent) {
- dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
- << " on " << *parent->inode << dendl;
- if (last >= srnode.current_parent_since &&
- !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last))
- return false;
- }
-
- // and my past parents too!
- assert(srnode.past_parents.size() >= open_past_parents.size());
- if (srnode.past_parents.size() > open_past_parents.size()) {
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
- p != srnode.past_parents.end();
- p++) {
- dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
- << p->second.ino << dendl;
- CInode *parent = mdcache->get_inode(p->second.ino);
- if (!parent) {
- mdcache->open_remote_ino(p->second.ino, finish);
- return false;
- }
- assert(parent->snaprealm); // hmm!
- if (!open_past_parents.count(p->second.ino)) {
- add_open_past_parent(parent->snaprealm);
- }
- if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
- return false;
- }
- }
-
- open = true;
- return true;
+ f->dump_unsigned("snapid", snapid);
+ f->dump_unsigned("ino", ino);
+ f->dump_stream("stamp") << stamp;
+ f->dump_string("name", name);
}
-bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
+void SnapInfo::generate_test_instances(list<SnapInfo*>& ls)
{
- dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
- if (open)
- return true;
-
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end();
- p++) {
- if (p->second.first > last)
- break;
- dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
- << p->second.ino << dendl;
- if (open_past_parents.count(p->second.ino) == 0) {
- dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
- return false;
- }
- if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first),
- MIN(last, p->first)))
- return false;
- }
-
- open = true;
- return true;
+ ls.push_back(new SnapInfo);
+ ls.push_back(new SnapInfo);
+ ls.back()->snapid = 1;
+ ls.back()->ino = 2;
+ ls.back()->stamp = utime_t(3, 4);
+ ls.back()->name = "foo";
}
-void SnapRealm::close_parents()
+ostream& operator<<(ostream& out, const SnapInfo &sn)
{
- for (map<inodeno_t,SnapRealm*>::iterator p = open_past_parents.begin();
- p != open_past_parents.end();
- p++)
- p->second->inode->put(CInode::PIN_PASTSNAPPARENT);
- open_past_parents.clear();
-}
-
-
-/*
- * get list of snaps for this realm. we must include parents' snaps
- * for the intervals during which they were our parent.
- */
-void SnapRealm::build_snap_set(set<snapid_t> &s,
- snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
- snapid_t first, snapid_t last)
-{
- dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl;
-
- if (srnode.seq > max_seq)
- max_seq = srnode.seq;
- if (srnode.last_created > max_last_created)
- max_last_created = srnode.last_created;
- if (srnode.last_destroyed > max_last_destroyed)
- max_last_destroyed = srnode.last_destroyed;
-
- // include my snaps within interval [first,last]
- for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- p++)
- s.insert(p->first);
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- p++) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
- MAX(first, p->second.first),
- MIN(last, p->first));
- }
- if (srnode.current_parent_since <= last && parent)
- parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
- MAX(first, srnode.current_parent_since), last);
-}
-
-
-void SnapRealm::check_cache()
-{
- if (cached_seq >= srnode.seq)
- return;
-
- cached_snaps.clear();
- cached_snap_context.clear();
-
- cached_last_created = srnode.last_created;
- cached_last_destroyed = srnode.last_destroyed;
- cached_seq = srnode.seq;
- build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed,
- 0, CEPH_NOSNAP);
-
- cached_snap_trace.clear();
- build_snap_trace(cached_snap_trace);
-
- dout(10) << "check_cache rebuilt " << cached_snaps
- << " seq " << srnode.seq
- << " cached_seq " << cached_seq
- << " cached_last_created " << cached_last_created
- << " cached_last_destroyed " << cached_last_destroyed
- << ")" << dendl;
-}
-
-const set<snapid_t>& SnapRealm::get_snaps()
-{
- check_cache();
- dout(10) << "get_snaps " << cached_snaps
- << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
- << dendl;
- return cached_snaps;
-}
-
-/*
- * build vector in reverse sorted order
- */
-const SnapContext& SnapRealm::get_snap_context()
-{
- check_cache();
-
- if (!cached_snap_context.seq) {
- cached_snap_context.seq = cached_seq;
- cached_snap_context.snaps.resize(cached_snaps.size());
- unsigned i = 0;
- for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
- p != cached_snaps.rend();
- p++)
- cached_snap_context.snaps[i++] = *p;
- }
-
- return cached_snap_context;
-}
-
-void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last)
-{
- const set<snapid_t>& snaps = get_snaps();
- dout(10) << "get_snap_info snaps " << snaps << dendl;
-
- // include my snaps within interval [first,last]
- for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- p++)
- infomap[p->first] = &p->second;
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- p++) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- oldparent->snaprealm->get_snap_info(infomap,
- MAX(first, p->second.first),
- MIN(last, p->first));
- }
- if (srnode.current_parent_since <= last && parent)
- parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last);
+ return out << "snap(" << sn.snapid
+ << " " << sn.ino
+ << " '" << sn.name
+ << "' " << sn.stamp << ")";
}
const string& SnapInfo::get_long_name()
@@ -272,226 +76,120 @@ const string& SnapInfo::get_long_name()
return long_name;
}
-const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
-{
- if (srnode.snaps.count(snapid)) {
- if (atino == inode->ino())
- return srnode.snaps[snapid].name;
- else
- return srnode.snaps[snapid].get_long_name();
- }
-
- map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
- if (p != srnode.past_parents.end() && p->second.first <= snapid) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- return oldparent->snaprealm->get_snapname(snapid, atino);
- }
-
- assert(srnode.current_parent_since <= snapid);
- assert(parent);
- return parent->get_snapname(snapid, atino);
-}
+/*
+ * snaplink_t
+ */
-snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last)
+void snaplink_t::encode(bufferlist& bl) const
{
- // first try me
- dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
-
- //snapid_t num;
- //if (n[0] == '~') num = atoll(n.c_str()+1);
-
- bool actual = (atino == inode->ino());
- string pname;
- inodeno_t pino;
- if (!actual) {
- if (!n.length() ||
- n[0] != '_') return 0;
- int next_ = n.find('_', 1);
- if (next_ < 0) return 0;
- pname = n.substr(1, next_ - 1);
- pino = atoll(n.c_str() + next_ + 1);
- dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
- }
-
- for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
- p != srnode.snaps.end() && p->first <= last;
- p++) {
- dout(15) << " ? " << p->second << dendl;
- //if (num && p->second.snapid == num)
- //return p->first;
- if (actual && p->second.name == n)
- return p->first;
- if (!actual && p->second.name == pname && p->second.ino == pino)
- return p->first;
- }
-
- // include snaps for parents during intervals that intersect [first,last]
- for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
- p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
- p++) {
- CInode *oldparent = mdcache->get_inode(p->second.ino);
- assert(oldparent); // call open_parents first!
- assert(oldparent->snaprealm);
- snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
- MAX(first, p->second.first),
- MIN(last, p->first));
- if (r)
- return r;
- }
- if (parent && srnode.current_parent_since <= last)
- return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last);
- return 0;
+ ENCODE_START(2, 2, bl);
+ ::encode(ino, bl);
+ ::encode(first, bl);
+ ENCODE_FINISH(bl);
}
-
-void SnapRealm::adjust_parent()
+void snaplink_t::decode(bufferlist::iterator& bl)
{
- SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
- if (newparent != parent) {
- dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
- if (parent)
- parent->open_children.erase(this);
- parent = newparent;
- if (parent)
- parent->open_children.insert(this);
-
- invalidate_cached_snaps();
- }
+ DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+ ::decode(ino, bl);
+ ::decode(first, bl);
+ DECODE_FINISH(bl);
}
-void SnapRealm::split_at(SnapRealm *child)
+void snaplink_t::dump(Formatter *f) const
{
- dout(10) << "split_at " << *child
- << " on " << *child->inode << dendl;
-
- if (!child->inode->is_dir()) {
- // it's not a dir.
- if (child->inode->containing_realm) {
- // - no open children.
- // - only need to move this child's inode's caps.
- child->inode->move_to_realm(child);
- } else {
- // no caps, nothing to move/split.
- dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
- assert(!child->inode->is_any_caps());
- }
- return;
- }
-
- // it's a dir.
-
- // split open_children
- dout(10) << " open_children are " << open_children << dendl;
- for (set<SnapRealm*>::iterator p = open_children.begin();
- p != open_children.end(); ) {
- SnapRealm *realm = *p;
- if (realm != child &&
- child->inode->is_projected_ancestor_of(realm->inode)) {
- dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
- realm->parent = child;
- child->open_children.insert(realm);
- open_children.erase(p++);
- } else {
- dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
- p++;
- }
- }
-
- // split inodes_with_caps
- elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
- while (!p.end()) {
- CInode *in = *p;
- ++p;
-
- // does inode fall within the child realm?
- bool under_child = false;
-
- if (in == child->inode) {
- under_child = true;
- } else {
- CInode *t = in;
- while (t->get_parent_dn()) {
- t = t->get_parent_dn()->get_dir()->get_inode();
- if (t == child->inode) {
- under_child = true;
- break;
- }
- if (t == in)
- break;
- }
- }
- if (under_child) {
- dout(20) << " child gets " << *in << dendl;
- in->move_to_realm(child);
- } else {
- dout(20) << " keeping " << *in << dendl;
- }
- }
-
+ f->dump_unsigned("ino", ino);
+ f->dump_unsigned("first", first);
}
-const bufferlist& SnapRealm::get_snap_trace()
+void snaplink_t::generate_test_instances(list<snaplink_t*>& ls)
{
- check_cache();
- return cached_snap_trace;
+ ls.push_back(new snaplink_t);
+ ls.push_back(new snaplink_t);
+ ls.back()->ino = 2;
+ ls.back()->first = 123;
}
-void SnapRealm::build_snap_trace(bufferlist& snapbl)
+ostream& operator<<(ostream& out, const snaplink_t &l)
{
- SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
-
- if (parent) {
- info.h.parent = parent->inode->ino();
- if (!srnode.past_parents.empty()) {
- snapid_t last = srnode.past_parents.rbegin()->first;
- set<snapid_t> past;
- snapid_t max_seq, max_last_created, max_last_destroyed;
- build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last);
- info.prior_parent_snaps.reserve(past.size());
- for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); p++)
- info.prior_parent_snaps.push_back(*p);
- dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] "
- << info.prior_parent_snaps << dendl;
- }
- } else
- info.h.parent = 0;
-
- info.my_snaps.reserve(srnode.snaps.size());
- for (map<snapid_t,SnapInfo>::reverse_iterator p = srnode.snaps.rbegin();
- p != srnode.snaps.rend();
- p++)
- info.my_snaps.push_back(p->first);
- dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
-
- ::encode(info, snapbl);
-
- if (parent)
- parent->build_snap_trace(snapbl);
+ return out << l.ino << "@" << l.first;
}
+/*
+ * sr_t
+ */
-
-void SnapRealm::prune_past_parents()
-{
- dout(10) << "prune_past_parents" << dendl;
- check_cache();
- assert(open);
-
- map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
- while (p != srnode.past_parents.end()) {
- set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first);
- if (q == cached_snaps.end() ||
- *q > p->first) {
- dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first
- << "] " << p->second.ino << dendl;
- srnode.past_parents.erase(p++);
- } else {
- dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first
- << "] " << p->second.ino << dendl;
- p++;
- }
- }
+void sr_t::encode(bufferlist& bl) const
+{
+ ENCODE_START(4, 4, bl);
+ ::encode(seq, bl);
+ ::encode(created, bl);
+ ::encode(last_created, bl);
+ ::encode(last_destroyed, bl);
+ ::encode(current_parent_since, bl);
+ ::encode(snaps, bl);
+ ::encode(past_parents, bl);
+ ENCODE_FINISH(bl);
+}
+
+void sr_t::decode(bufferlist::iterator& p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, p);
+ if (struct_v == 2) {
+ __u8 struct_v;
+ ::decode(struct_v, p); // yes, really: extra byte for v2 encoding only, see 6ee52e7d.
+ }
+ ::decode(seq, p);
+ ::decode(created, p);
+ ::decode(last_created, p);
+ ::decode(last_destroyed, p);
+ ::decode(current_parent_since, p);
+ ::decode(snaps, p);
+ ::decode(past_parents, p);
+ DECODE_FINISH(p);
+}
+
+void sr_t::dump(Formatter *f) const
+{
+ f->dump_unsigned("seq", seq);
+ f->dump_unsigned("created", created);
+ f->dump_unsigned("last_created", last_created);
+ f->dump_unsigned("last_destroyed", last_destroyed);
+ f->dump_unsigned("current_parent_since", current_parent_since);
+
+ f->open_array_section("snaps");
+ for (map<snapid_t,SnapInfo>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
+ f->open_object_section("snapinfo");
+ f->dump_unsigned("last", p->first);
+ p->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_array_section("past_parents");
+ for (map<snapid_t,snaplink_t>::const_iterator p = past_parents.begin(); p != past_parents.end(); ++p) {
+ f->open_object_section("past_parent");
+ f->dump_unsigned("last", p->first);
+ p->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void sr_t::generate_test_instances(list<sr_t*>& ls)
+{
+ ls.push_back(new sr_t);
+ ls.push_back(new sr_t);
+ ls.back()->seq = 1;
+ ls.back()->created = 2;
+ ls.back()->last_created = 3;
+ ls.back()->last_destroyed = 4;
+ ls.back()->current_parent_since = 5;
+ ls.back()->snaps[123].snapid = 7;
+ ls.back()->snaps[123].ino = 8;
+ ls.back()->snaps[123].stamp = utime_t(9, 10);
+ ls.back()->snaps[123].name = "name1";
+ ls.back()->past_parents[12].ino = 12;
+ ls.back()->past_parents[12].first = 3;
}
diff --git a/src/mds/snap.h b/src/mds/snap.h
index e583820dce9..068b6f17073 100644
--- a/src/mds/snap.h
+++ b/src/mds/snap.h
@@ -16,8 +16,6 @@
#define CEPH_MDS_SNAP_H
#include "mdstypes.h"
-#include "include/xlist.h"
-#include "include/elist.h"
#include "common/snap_types.h"
/*
@@ -27,35 +25,20 @@ struct SnapInfo {
snapid_t snapid;
inodeno_t ino;
utime_t stamp;
- string name, long_name;
+ string name;
+
+ string long_name; ///< cached _$ino_$name
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(snapid, bl);
- ::encode(ino, bl);
- ::encode(stamp, bl);
- ::encode(name, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(snapid, bl);
- ::decode(ino, bl);
- ::decode(stamp, bl);
- ::decode(name, bl);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<SnapInfo*>& ls);
+
const string& get_long_name();
};
WRITE_CLASS_ENCODER(SnapInfo)
-inline ostream& operator<<(ostream& out, const SnapInfo &sn) {
- return out << "snap(" << sn.snapid
- << " " << sn.ino
- << " '" << sn.name
- << "' " << sn.stamp << ")";
-}
-
+ostream& operator<<(ostream& out, const SnapInfo &sn);
/*
@@ -74,25 +57,16 @@ class MDRequest;
struct snaplink_t {
inodeno_t ino;
snapid_t first;
- void encode(bufferlist& bl) const {
- __u8 struct_v = 1;
- ::encode(struct_v, bl);
- ::encode(ino, bl);
- ::encode(first, bl);
- }
- void decode(bufferlist::iterator& bl) {
- __u8 struct_v;
- ::decode(struct_v, bl);
- ::decode(ino, bl);
- ::decode(first, bl);
- }
+
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<snaplink_t*>& ls);
};
WRITE_CLASS_ENCODER(snaplink_t)
-inline ostream& operator<<(ostream& out, const snaplink_t &l)
-{
- return out << l.ino << "@" << l.first;
-}
+ostream& operator<<(ostream& out, const snaplink_t &l);
+
// carry data about a specific version of a SnapRealm
struct sr_t {
@@ -104,166 +78,17 @@ struct sr_t {
map<snapid_t, SnapInfo> snaps;
map<snapid_t, snaplink_t> past_parents; // key is "last" (or NOSNAP)
- sr_t() :
- seq(0), created(0),
- last_created(0), last_destroyed(0),
- current_parent_since(1)
+ sr_t()
+ : seq(0), created(0),
+ last_created(0), last_destroyed(0),
+ current_parent_since(1)
{}
-
- void encode(bufferlist& bl) const {
- __u8 struct_v = 3;
- ::encode(struct_v, bl);
- ::encode(seq, bl);
- ::encode(created, bl);
- ::encode(last_created, bl);
- ::encode(last_destroyed, bl);
- ::encode(current_parent_since, bl);
- ::encode(snaps, bl);
- ::encode(past_parents, bl);
- }
- void decode(bufferlist::iterator& p) {
- __u8 struct_v;
- ::decode(struct_v, p);
- if (struct_v == 2)
- ::decode(struct_v, p); // yes, really: extra byte for v2 encoding only, see 6ee52e7d.
- ::decode(seq, p);
- ::decode(created, p);
- ::decode(last_created, p);
- ::decode(last_destroyed, p);
- ::decode(current_parent_since, p);
- ::decode(snaps, p);
- ::decode(past_parents, p);
- }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<sr_t*>& ls);
};
WRITE_CLASS_ENCODER(sr_t);
-struct SnapRealm {
- // realm state
-
- sr_t srnode;
-
- // in-memory state
- MDCache *mdcache;
- CInode *inode;
-
- bool open; // set to true once all past_parents are opened
- SnapRealm *parent;
- set<SnapRealm*> open_children; // active children that are currently open
- map<inodeno_t,SnapRealm*> open_past_parents; // these are explicitly pinned.
-
- // cache
- snapid_t cached_seq; // max seq over self and all past+present parents.
- snapid_t cached_last_created; // max last_created over all past+present parents
- snapid_t cached_last_destroyed;
- set<snapid_t> cached_snaps;
- SnapContext cached_snap_context;
-
- bufferlist cached_snap_trace;
-
- elist<CInode*> inodes_with_caps; // for efficient realm splits
- map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
-
- SnapRealm(MDCache *c, CInode *in) :
- srnode(),
- mdcache(c), inode(in),
- open(false), parent(0),
- inodes_with_caps(0)
- { }
-
- bool exists(const string &name) {
- for (map<snapid_t,SnapInfo>::iterator p = srnode.snaps.begin();
- p != srnode.snaps.end();
- p++)
- if (p->second.name == name)
- return true;
- return false;
- }
-
- bool _open_parents(Context *retryorfinish, snapid_t first=1, snapid_t last=CEPH_NOSNAP);
- bool open_parents(Context *retryorfinish) {
- if (!_open_parents(retryorfinish))
- return false;
- delete retryorfinish;
- return true;
- }
- bool have_past_parents_open(snapid_t first=1, snapid_t last=CEPH_NOSNAP);
- void add_open_past_parent(SnapRealm *parent);
- void close_parents();
-
- void prune_past_parents();
- bool has_past_parents() { return !srnode.past_parents.empty(); }
-
- void build_snap_set(set<snapid_t>& s,
- snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
- snapid_t first, snapid_t last);
- void get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
-
- const bufferlist& get_snap_trace();
- void build_snap_trace(bufferlist& snapbl);
-
- const string& get_snapname(snapid_t snapid, inodeno_t atino);
- snapid_t resolve_snapname(const string &name, inodeno_t atino, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
-
- void check_cache();
- const set<snapid_t>& get_snaps();
- const SnapContext& get_snap_context();
- void invalidate_cached_snaps() {
- cached_seq = 0;
- }
- snapid_t get_last_created() {
- check_cache();
- return cached_last_created;
- }
- snapid_t get_last_destroyed() {
- check_cache();
- return cached_last_destroyed;
- }
- snapid_t get_newest_snap() {
- check_cache();
- if (cached_snaps.empty())
- return 0;
- else
- return *cached_snaps.rbegin();
- }
- snapid_t get_newest_seq() {
- check_cache();
- return cached_seq;
- }
-
- snapid_t get_snap_following(snapid_t follows) {
- check_cache();
- set<snapid_t> s = get_snaps();
- set<snapid_t>::iterator p = s.upper_bound(follows);
- if (p != s.end())
- return *p;
- return CEPH_NOSNAP;
- }
-
- void adjust_parent();
-
- void split_at(SnapRealm *child);
- void join(SnapRealm *child);
-
- void add_cap(client_t client, Capability *cap) {
- if (client_caps.count(client) == 0)
- client_caps[client] = new xlist<Capability*>;
- client_caps[client]->push_back(&cap->item_snaprealm_caps);
- }
- void remove_cap(client_t client, Capability *cap) {
- cap->item_snaprealm_caps.remove_myself();
- if (client_caps[client]->empty()) {
- delete client_caps[client];
- client_caps.erase(client);
- }
- }
-
-};
-
-ostream& operator<<(ostream& out, const SnapRealm &realm);
-
-
-
-
-
#endif
diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h
index 400159c6db6..f7d6ac9897d 100644
--- a/src/messages/MClientReconnect.h
+++ b/src/messages/MClientReconnect.h
@@ -22,7 +22,7 @@
class MClientReconnect : public Message {
- const static int HEAD_VERSION = 2;
+ const static int HEAD_VERSION = 3;
public:
map<inodeno_t, cap_reconnect_t> caps; // only head inodes
@@ -53,9 +53,17 @@ public:
}
void encode_payload(uint64_t features) {
- if (features & CEPH_FEATURE_FLOCK) {
- // new protocol
+ if (features & CEPH_FEATURE_MDSENC) {
::encode(caps, data);
+ } else if (features & CEPH_FEATURE_FLOCK) {
+ // encode with old cap_reconnect_t encoding
+ __u32 n = caps.size();
+ ::encode(n, data);
+ for (map<inodeno_t,cap_reconnect_t>::iterator p = caps.begin(); p != caps.end(); ++p) {
+ ::encode(p->first, data);
+ p->second.encode_old(data);
+ }
+ header.version = 2;
} else {
// compat crap
header.version = 1;
@@ -68,9 +76,17 @@ public:
}
void decode_payload() {
bufferlist::iterator p = data.begin();
- if (header.version >= 2) {
+ if (header.version >= 3) {
// new protocol
::decode(caps, p);
+ } else if (header.version == 2) {
+ __u32 n;
+ ::decode(n, p);
+ inodeno_t ino;
+ while (n--) {
+ ::decode(ino, p);
+ caps[ino].decode_old(p);
+ }
} else {
// compat crap
map<inodeno_t, old_cap_reconnect_t> ocaps;
diff --git a/src/messages/MMDSMap.h b/src/messages/MMDSMap.h
index b5558493746..42bb98f54e1 100644
--- a/src/messages/MMDSMap.h
+++ b/src/messages/MMDSMap.h
@@ -61,7 +61,7 @@ class MMDSMap : public Message {
Message(CEPH_MSG_MDS_MAP),
fsid(f) {
epoch = mm->get_epoch();
- mm->encode(encoded);
+ mm->encode(encoded, -1); // we will reencode with fewer features as necessary
}
private:
~MMDSMap() {}
@@ -87,7 +87,7 @@ public:
MDSMap m;
m.decode(encoded);
encoded.clear();
- m.encode_client_old(encoded);
+ m.encode(encoded, features);
}
::encode(encoded, payload);
}
diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h
index 2d3a66d96af..4fae008c17e 100644
--- a/src/messages/MOSDRepScrub.h
+++ b/src/messages/MOSDRepScrub.h
@@ -36,7 +36,10 @@ struct MOSDRepScrub : public Message {
hobject_t end; // upper bound of scrub, exclusive
bool deep; // true if scrub should be deep
- MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION) { }
+ MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+ chunky(false),
+ deep(false) { }
+
MOSDRepScrub(pg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
epoch_t map_epoch)
: Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc
index e4cd752f29b..ac54064d568 100644
--- a/src/mon/AuthMonitor.cc
+++ b/src/mon/AuthMonitor.cc
@@ -207,7 +207,7 @@ void AuthMonitor::increase_max_global_id()
bool AuthMonitor::should_propose(double& delay)
{
- return (pending_auth.size() > 0);
+ return (!pending_auth.empty());
}
void AuthMonitor::create_pending()
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index cd2dc8fa517..3cab479ee12 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -126,7 +126,7 @@ void MDSMonitor::encode_pending(bufferlist &bl)
// apply to paxos
assert(paxos->get_version() + 1 == pending_mdsmap.epoch);
- pending_mdsmap.encode(bl);
+ pending_mdsmap.encode(bl, mon->get_quorum_features());
}
void MDSMonitor::update_logger()
@@ -354,6 +354,13 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
// boot?
if (state == MDSMap::STATE_BOOT) {
+ // zap previous instance of this name?
+ if (g_conf->mds_enforce_unique_name) {
+ while (uint64_t existing = pending_mdsmap.find_mds_gid_by_name(m->get_name())) {
+ fail_mds_gid(existing);
+ }
+ }
+
// add
MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid];
info.global_id = gid;
@@ -376,7 +383,6 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
}
}
-
// initialize the beacon timer
last_beacon[gid].stamp = ceph_clock_now(g_ceph_context);
last_beacon[gid].seq = seq;
@@ -603,11 +609,11 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
} else {
MDSMap mm;
mm.decode(b);
- mm.encode(rdata);
+ mm.encode(rdata, m->get_connection()->get_features());
ss << "got mdsmap epoch " << mm.get_epoch();
}
} else {
- mdsmap.encode(rdata);
+ mdsmap.encode(rdata, m->get_connection()->get_features());
ss << "got mdsmap epoch " << mdsmap.get_epoch();
}
r = 0;
@@ -672,10 +678,30 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
return false;
}
+void MDSMonitor::fail_mds_gid(uint64_t gid)
+{
+ assert(pending_mdsmap.mds_info.count(gid));
+ MDSMap::mds_info_t& info = pending_mdsmap.mds_info[gid];
+ dout(10) << "fail_mds_gid " << gid << " mds." << info.name << " rank " << info.rank << dendl;
+
+ utime_t until = ceph_clock_now(g_ceph_context);
+ until += g_conf->mds_blacklist_interval;
+
+ pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until);
+ mon->osdmon()->propose_pending();
+
+ if (info.rank >= 0) {
+ pending_mdsmap.up.erase(info.rank);
+ pending_mdsmap.failed.insert(info.rank);
+ }
+
+ pending_mdsmap.mds_info.erase(gid);
+}
+
int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
{
std::string err;
- int w = strict_strtol(arg.c_str(), 10, &err);
+ int w = strict_strtoll(arg.c_str(), 10, &err);
if (!err.empty()) {
// Try to interpret the arg as an MDS name
const MDSMap::mds_info_t *mds_info = mdsmap.find_by_name(arg);
@@ -688,18 +714,12 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
if (pending_mdsmap.up.count(w)) {
uint64_t gid = pending_mdsmap.up[w];
- if (pending_mdsmap.mds_info.count(gid)) {
- utime_t until = ceph_clock_now(g_ceph_context);
- until += g_conf->mds_blacklist_interval;
- MDSMap::mds_info_t& info = pending_mdsmap.mds_info[pending_mdsmap.up[w]];
- pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until);
- mon->osdmon()->propose_pending();
-
- pending_mdsmap.mds_info.erase(gid);
- }
- pending_mdsmap.up.erase(w);
- pending_mdsmap.failed.insert(w);
+ if (pending_mdsmap.mds_info.count(gid))
+ fail_mds_gid(gid);
ss << "failed mds." << w;
+ } else if (pending_mdsmap.mds_info.count(w)) {
+ fail_mds_gid(w);
+ ss << "failed mds gid " << w;
}
return 0;
}
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index d852785fa90..53210d99d33 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -54,13 +54,10 @@ class MDSMonitor : public PaxosService {
C_Updated(MDSMonitor *a, MMDSBeacon *c) :
mm(a), m(c) {}
void finish(int r) {
- if (r == -ECANCELED) {
- if (m)
- m->put();
- return;
- }
if (r >= 0)
mm->_updated(m); // success
+ else if (r == -ECANCELED)
+ m->put();
else
mm->dispatch((PaxosServiceMessage*)m); // try again
}
@@ -96,6 +93,8 @@ class MDSMonitor : public PaxosService {
void get_health(list<pair<health_status_t,string> >& summary,
list<pair<health_status_t,string> > *detail) const;
int fail_mds(std::ostream &ss, const std::string &arg);
+ void fail_mds_gid(uint64_t gid);
+
int cluster_fail(std::ostream &ss);
bool preprocess_command(MMonCommand *m);
diff --git a/src/mon/MonCaps.cc b/src/mon/MonCaps.cc
index 3c5be9278a3..dc3d9bdd0de 100644
--- a/src/mon/MonCaps.cc
+++ b/src/mon/MonCaps.cc
@@ -213,8 +213,7 @@ do { \
if (token.compare(";") == 0 || pos >= s.size()) {
if (got_eq) {
- ASSERT_STATE((services_list.size() > 0) ||
- (uid_list.size() > 0));
+ ASSERT_STATE(!services_list.empty() || !uid_list.empty());
for (list<int>::iterator i = services_list.begin(); i != services_list.end(); ++i) {
MonCap& cap = services_map[*i];
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 699db8968f1..1b8bc9ebeb7 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -484,7 +484,7 @@ int Monitor::preinit()
list<string> initial_members;
get_str_list(g_conf->mon_initial_members, initial_members);
- if (initial_members.size()) {
+ if (!initial_members.empty()) {
dout(1) << " initial_members " << initial_members << ", filtering seed monmap" << dendl;
monmap->set_initial_members(g_ceph_context, initial_members, name, messenger->get_myaddr(),
@@ -1358,7 +1358,7 @@ void Monitor::get_health(string& status, bufferlist *detailbl, Formatter *f)
<< (timecheck_round%2 ? "on-going" : "finished");
}
- if (timecheck_skews.size() != 0) {
+ if (!timecheck_skews.empty()) {
list<string> warns;
if (f)
f->open_array_section("mons");
@@ -2299,7 +2299,7 @@ void Monitor::timecheck_finish_round(bool success)
timecheck_round_start = utime_t();
if (success) {
- assert(timecheck_waiting.size() == 0);
+ assert(timecheck_waiting.empty());
assert(timecheck_acks == quorum.size());
timecheck_report();
return;
@@ -2544,7 +2544,7 @@ void Monitor::handle_timecheck_leader(MTimeCheck *m)
dout(10) << __func__ << " got pongs from everybody ("
<< timecheck_acks << " total)" << dendl;
assert(timecheck_skews.size() == timecheck_acks);
- assert(timecheck_waiting.size() == 0);
+ assert(timecheck_waiting.empty());
// everyone has acked, so bump the round to finish it.
timecheck_finish_round();
}
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index c7704bb16da..e8baf8d864c 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -460,10 +460,12 @@ public:
void finish(int r) {
if (r >= 0)
mon->reply_command(m, rc, rs, rdata, version);
- else if (r == -ECANCELED) {
+ else if (r == -ECANCELED)
m->put();
- } else
+ else if (r == -EAGAIN)
mon->_ms_dispatch(m);
+ else
+ assert(0 == "bad C_Command return value");
}
};
@@ -474,10 +476,12 @@ public:
public:
C_RetryMessage(Monitor *m, Message *ms) : mon(m), msg(ms) {}
void finish(int r) {
- if (r == -ECANCELED) {
- msg->put();
- } else
+ if (r == -EAGAIN || r >= 0)
mon->_ms_dispatch(msg);
+ else if (r == -ECANCELED)
+ msg->put();
+ else
+ assert(0 == "bad C_RetryMessage return value");
}
};
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 1355938c582..9c094307e4c 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -248,7 +248,7 @@ bool OSDMonitor::thrash()
if (std::find(v.begin(), v.end(), *q) == v.end())
v.push_back(*q);
}
- if (v.size())
+ if (!v.empty())
pending_inc.new_pg_temp[p->first] = v;
dout(5) << "thrash_map pg " << p->first << " pg_temp remapped to " << v << dendl;
@@ -1880,6 +1880,38 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
}
}
}
+ else if (m->cmd[1] == "find") {
+ if (m->cmd.size() < 3) {
+ ss << "usage: osd find <osd-id>";
+ r = -EINVAL;
+ goto out;
+ }
+ long osd = parse_osd_id(m->cmd[2].c_str(), &ss);
+ if (osd < 0) {
+ r = -EINVAL;
+ goto out;
+ }
+ if (!osdmap.exists(osd)) {
+ ss << "osd." << osd << " does not exist";
+ r = -ENOENT;
+ goto out;
+ }
+ JSONFormatter jf(true);
+ jf.open_object_section("osd_location");
+ jf.dump_int("osd", osd);
+ jf.dump_stream("ip") << osdmap.get_addr(osd);
+ jf.open_object_section("crush_location");
+ map<string,string> loc = osdmap.crush->get_full_location(osd);
+ for (map<string,string>::iterator p = loc.begin(); p != loc.end(); ++p)
+ jf.dump_string(p->first.c_str(), p->second);
+ jf.close_section();
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
else if (m->cmd[1] == "map" && m->cmd.size() == 4) {
int64_t pool = osdmap.lookup_pg_pool_name(m->cmd[2].c_str());
if (pool < 0) {
@@ -1964,6 +1996,40 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
ss << "listed " << osdmap.blacklist.size() << " entries";
r = 0;
}
+ else if (m->cmd.size() >= 4 && m->cmd[1] == "crush" && m->cmd[2] == "rule" && (m->cmd[3] == "list" ||
+ m->cmd[3] == "ls")) {
+ JSONFormatter jf(true);
+ jf.open_array_section("rules");
+ osdmap.crush->list_rules(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
+ else if (m->cmd.size() >= 4 && m->cmd[1] == "crush" && m->cmd[2] == "rule" && m->cmd[3] == "dump") {
+ JSONFormatter jf(true);
+ jf.open_array_section("rules");
+ osdmap.crush->dump_rules(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
+ else if (m->cmd.size() == 3 && m->cmd[1] == "crush" && m->cmd[2] == "dump") {
+ JSONFormatter jf(true);
+ jf.open_object_section("crush_map");
+ osdmap.crush->dump(&jf);
+ jf.close_section();
+ ostringstream rs;
+ jf.flush(rs);
+ rs << "\n";
+ rdata.append(rs.str());
+ r = 0;
+ }
}
out:
if (r != -1) {
@@ -2380,6 +2446,94 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
return true;
}
}
+ else if (m->cmd.size() == 7 &&
+ m->cmd[1] == "crush" &&
+ m->cmd[2] == "rule" &&
+ m->cmd[3] == "create-simple") {
+ string name = m->cmd[4];
+ string root = m->cmd[5];
+ string type = m->cmd[6];
+
+ if (osdmap.crush->rule_exists(name)) {
+ ss << "rule " << name << " already exists";
+ err = 0;
+ goto out;
+ }
+
+ bufferlist bl;
+ if (pending_inc.crush.length())
+ bl = pending_inc.crush;
+ else
+ osdmap.crush->encode(bl);
+ CrushWrapper newcrush;
+ bufferlist::iterator p = bl.begin();
+ newcrush.decode(p);
+
+ if (newcrush.rule_exists(name)) {
+ ss << "rule " << name << " already exists";
+ } else {
+ int rule = newcrush.add_simple_rule(name, root, type);
+ if (rule < 0) {
+ err = rule;
+ goto out;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ }
+ getline(ss, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
+ return true;
+ }
+ else if (m->cmd.size() == 5 &&
+ m->cmd[1] == "crush" &&
+ m->cmd[2] == "rule" &&
+ m->cmd[3] == "rm") {
+ string name = m->cmd[4];
+
+ if (!osdmap.crush->rule_exists(name)) {
+ ss << "rule " << name << " does not exist";
+ err = 0;
+ goto out;
+ }
+
+ bufferlist bl;
+ if (pending_inc.crush.length())
+ bl = pending_inc.crush;
+ else
+ osdmap.crush->encode(bl);
+ CrushWrapper newcrush;
+ bufferlist::iterator p = bl.begin();
+ newcrush.decode(p);
+
+ if (!newcrush.rule_exists(name)) {
+ ss << "rule " << name << " does not exist";
+ } else {
+ int ruleno = newcrush.get_rule_id(name);
+ assert(ruleno >= 0);
+
+ // make sure it is not in use.
+ // FIXME: this is ok in some situations, but let's not bother with that
+ // complexity now.
+ int ruleset = newcrush.get_rule_mask_ruleset(ruleno);
+ if (osdmap.crush_ruleset_in_use(ruleset)) {
+ ss << "crush rule " << name << " ruleset " << ruleset << " is in use";
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = newcrush.remove_rule(ruleno);
+ if (err < 0) {
+ goto out;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush);
+ }
+ getline(ss, rs);
+ paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
+ return true;
+ }
else if (m->cmd[1] == "setmaxosd" && m->cmd.size() > 2) {
int newmax = parse_pos_long(m->cmd[2].c_str(), &ss);
if (newmax < 0) {
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index f53b6285abb..05c484ed652 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -56,7 +56,7 @@ struct failure_info_t {
failure_info_t() : num_reports(0) {}
utime_t get_failed_since() {
- if (max_failed_since == utime_t() && reporters.size()) {
+ if (max_failed_since == utime_t() && !reporters.empty()) {
// the old max must have canceled; recalculate.
for (map<int, failure_reporter_t>::iterator p = reporters.begin();
p != reporters.end();
@@ -213,8 +213,10 @@ private:
cmon->_booted(m, logit);
else if (r == -ECANCELED)
m->put();
- else
+ else if (r == -EAGAIN)
cmon->dispatch((PaxosServiceMessage*)m);
+ else
+ assert(0 == "bad C_Booted return value");
}
};
@@ -224,13 +226,14 @@ private:
epoch_t e;
C_ReplyMap(OSDMonitor *o, PaxosServiceMessage *mm, epoch_t ee) : osdmon(o), m(mm), e(ee) {}
void finish(int r) {
- if (r >= 0) {
+ if (r >= 0)
osdmon->_reply_map(m, e);
- } else if (r == -ECANCELED) {
+ else if (r == -ECANCELED)
m->put();
- } else {
+ else if (r == -EAGAIN)
osdmon->dispatch(m);
- }
+ else
+ assert(0 == "bad C_ReplyMap return value");
}
};
struct C_PoolOp : public Context {
@@ -245,13 +248,14 @@ private:
reply_data = *rd;
}
void finish(int r) {
- if (r >= 0) {
+ if (r >= 0)
osdmon->_pool_op_reply(m, replyCode, epoch, &reply_data);
- } else if (r == -ECANCELED) {
+ else if (r == -ECANCELED)
m->put();
- } else {
+ else if (r == -EAGAIN)
osdmon->dispatch(m);
- }
+ else
+ assert(0 == "bad C_PoolOp return value");
}
};
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index 02ae6e95324..ce896141875 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -244,7 +244,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stamp_delta += delta_t;
pg_sum_delta.stats.add(d.stats);
- if (pg_sum_deltas.size() > (std::list< pair<pool_stat_t, utime_t> >::size_type)MIN(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) {
+ if (pg_sum_deltas.size() > (std::list< pair<pool_stat_t, utime_t> >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) {
pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats);
stamp_delta -= pg_sum_deltas.front().second;
pg_sum_deltas.pop_front();
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 7e9b83ba5e0..213aac44bae 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -167,7 +167,6 @@ void PGMonitor::update_from_paxos()
}
// walk through incrementals
- utime_t now(ceph_clock_now(g_ceph_context));
while (paxosv > pg_map.version) {
bufferlist bl;
bool success = paxos->read(pg_map.version+1, bl);
@@ -1346,7 +1345,7 @@ void PGMonitor::check_full_osd_health(list<pair<health_status_t,string> >& summa
const set<int>& s, const char *desc,
health_status_t sev) const
{
- if (s.size() > 0) {
+ if (!s.empty()) {
ostringstream ss;
ss << s.size() << " " << desc << " osd(s)";
summary.push_back(make_pair(sev, ss.str()));
diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h
index c150e157b9d..0308e429d8d 100644
--- a/src/mon/PGMonitor.h
+++ b/src/mon/PGMonitor.h
@@ -76,9 +76,11 @@ private:
} else if (r == -ECANCELED) {
req->put();
ack->put();
- } else {
- ack->put();
+ } else if (r == -EAGAIN) {
pgmon->dispatch(req);
+ ack->put();
+ } else {
+ assert(0 == "bad C_Stats return value");
}
}
};
diff --git a/src/monmaptool.cc b/src/monmaptool.cc
index 5870e5f81ad..e15e42b2ca8 100644
--- a/src/monmaptool.cc
+++ b/src/monmaptool.cc
@@ -91,7 +91,7 @@ int main(int argc, const char **argv)
++i;
}
}
- if (args.size() < 1) {
+ if (args.empty()) {
cerr << me << ": must specify monmap filename" << std::endl;
usage();
}
@@ -147,7 +147,7 @@ int main(int argc, const char **argv)
// apply initial members
list<string> initial_members;
get_str_list(g_conf->mon_initial_members, initial_members);
- if (initial_members.size()) {
+ if (!initial_members.empty()) {
cout << "initial_members " << initial_members << ", filtering seed monmap" << std::endl;
set<entity_addr_t> removed;
monmap.set_initial_members(g_ceph_context, initial_members,
diff --git a/src/msg/Message.h b/src/msg/Message.h
index 5bdd4d463b6..5e2b4f58d3c 100644
--- a/src/msg/Message.h
+++ b/src/msg/Message.h
@@ -188,7 +188,7 @@ public:
}
Connection *get() {
- return (Connection *)RefCountedObject::get();
+ return static_cast<Connection *>(RefCountedObject::get());
}
void set_priv(RefCountedObject *o) {
@@ -329,7 +329,7 @@ public:
}
Message *get() {
- return (Message *)RefCountedObject::get();
+ return static_cast<Message *>(RefCountedObject::get());
}
protected:
diff --git a/src/msg/Messenger.h b/src/msg/Messenger.h
index b75e4420f66..2615623c41c 100644
--- a/src/msg/Messenger.h
+++ b/src/msg/Messenger.h
@@ -184,7 +184,7 @@ public:
*
* @param m The name to set.
*/
- void set_myname(const entity_name_t m) { my_inst.name = m; }
+ void set_myname(const entity_name_t& m) { my_inst.name = m; }
/**
* Set the unknown address components for this Messenger.
* This is useful if the Messenger doesn't know its full address just by
@@ -552,11 +552,10 @@ public:
p++)
if ((*p)->ms_dispatch(m))
return;
- std::ostringstream oss;
- oss << "ms_deliver_dispatch: fatal error: unhandled message "
- << m << " " << *m << " from " << m->get_source_inst();
- dout_emergency(oss.str());
- assert(0);
+ lsubdout(cct, ms, 0) << "ms_deliver_dispatch: unhandled message " << m << " " << *m << " from "
+ << m->get_source_inst() << dendl;
+ assert(!cct->_conf->ms_die_on_unhandled_msg);
+ m->put();
}
/**
* Notify each Dispatcher of a new Connection. Call
diff --git a/src/msg/msg_types.h b/src/msg/msg_types.h
index 8f3d74bb00c..e80639ead0b 100644
--- a/src/msg/msg_types.h
+++ b/src/msg/msg_types.h
@@ -142,7 +142,7 @@ inline std::ostream& operator<<(std::ostream& out, const ceph_entity_name& addr)
namespace __gnu_cxx {
template<> struct hash< entity_name_t >
{
- size_t operator()( const entity_name_t m ) const
+ size_t operator()( const entity_name_t &m ) const
{
return rjhash32(m.type() ^ m.num());
}
diff --git a/src/ocf/.gitignore b/src/ocf/.gitignore
new file mode 100644
index 00000000000..0d609338edf
--- /dev/null
+++ b/src/ocf/.gitignore
@@ -0,0 +1,2 @@
+/ceph
+/rbd
diff --git a/src/ocf/rbd.in b/src/ocf/rbd.in
index 041788d96fc..150ad6e6b21 100644
--- a/src/ocf/rbd.in
+++ b/src/ocf/rbd.in
@@ -134,7 +134,7 @@ find_rbd_dev() {
# Build the sed pattern, substituting "-" for the snapshot name if
# it's unset
- sedpat="[0-9]\+[ \t]\+${OCF_RESKEY_pool}[ \t]\+${OCF_RESKEY_name}[ \t]\+${OCF_RESKEY_snap:--}[ \t]\+\(/dev/rbd[0-9]\+\)"
+ sedpat="[0-9]\+[ \t]\+${OCF_RESKEY_pool}[ \t]\+${OCF_RESKEY_name}[ \t]\+${OCF_RESKEY_snap:--}[ \t]\+\(/dev/rbd[0-9]\+\).*"
# Run rbd showmapped, filter out the header line, then try to
# extract the device name
diff --git a/src/os/DBObjectMap.cc b/src/os/DBObjectMap.cc
index 10b7b705a4b..c3a4c3b9869 100644
--- a/src/os/DBObjectMap.cc
+++ b/src/os/DBObjectMap.cc
@@ -104,7 +104,7 @@ bool DBObjectMap::check(std::ostream &out)
map<string, bufferlist> got;
to_get.insert(HEADER_KEY);
db->get(sys_parent_prefix(header), to_get, &got);
- if (!got.size()) {
+ if (got.empty()) {
out << "Missing: seq " << header.parent << std::endl;
retval = false;
break;
@@ -242,8 +242,7 @@ bool DBObjectMap::parse_hobject_key_v0(const string &in, coll_t *c,
*c = coll_t(coll);
int64_t pool = -1;
pg_t pg;
- snapid_t pg_snap;
- if (c->is_pg(pg, pg_snap))
+ if (c->is_pg_prefix(pg))
pool = (int64_t)pg.pool();
(*hoid) = hobject_t(name, key, snap, hash, pool);
return true;
@@ -554,7 +553,7 @@ int DBObjectMap::_get_header(Header header,
set<string> to_get;
to_get.insert(USER_HEADER_KEY);
int r = db->get(sys_prefix(header), to_get, &out);
- if (r == 0 && out.size())
+ if (r == 0 && !out.empty())
break;
if (r < 0)
return r;
@@ -564,7 +563,7 @@ int DBObjectMap::_get_header(Header header,
header = lookup_parent(current);
}
- if (out.size())
+ if (!out.empty())
bl->swap(out.begin()->second);
return 0;
}
@@ -969,7 +968,7 @@ int DBObjectMap::upgrade()
&got);
if (r < 0)
return r;
- if (!got.size())
+ if (got.empty())
continue; // Moved in a previous transaction
t->rmkeys(USER_PREFIX + header_key(hdr.parent) + SYS_PREFIX,
@@ -1017,7 +1016,7 @@ int DBObjectMap::init(bool do_upgrade)
int r = db->get(SYS_PREFIX, to_get, &result);
if (r < 0)
return r;
- if (result.size()) {
+ if (!result.empty()) {
bufferlist::iterator bliter = result.begin()->second.begin();
state.decode(bliter);
if (state.v < 1) { // Needs upgrade
@@ -1081,7 +1080,7 @@ DBObjectMap::Header DBObjectMap::_lookup_map_header(const hobject_t &hoid)
int r = db->get(HOBJECT_TO_SEQ, to_get, &out);
if (r < 0)
return Header();
- if (!out.size())
+ if (out.empty())
return Header();
Header ret(new _Header(), RemoveMapHeaderOnDelete(this, hoid));
@@ -1124,7 +1123,7 @@ DBObjectMap::Header DBObjectMap::lookup_parent(Header input)
assert(0);
return Header();
}
- if (out.size() < 1) {
+ if (out.empty()) {
assert(0);
return Header();
}
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 1bab9c3c36d..c91d47c6d0d 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -490,6 +490,7 @@ bool parse_attrname(char **name)
static int do_fiemap(int fd, off_t start, size_t len, struct fiemap **pfiemap)
{
struct fiemap *fiemap = NULL;
+ struct fiemap *_realloc_fiemap = NULL;
int size;
int ret;
@@ -509,11 +510,13 @@ static int do_fiemap(int fd, off_t start, size_t len, struct fiemap **pfiemap)
size = sizeof(struct fiemap_extent) * (fiemap->fm_mapped_extents);
- fiemap = (struct fiemap *)realloc(fiemap, sizeof(struct fiemap) +
+ _realloc_fiemap = (struct fiemap *)realloc(fiemap, sizeof(struct fiemap) +
size);
- if (!fiemap) {
+ if (!_realloc_fiemap) {
ret = -ENOMEM;
goto done_err;
+ } else {
+ fiemap = _realloc_fiemap;
}
memset(fiemap->fm_extents, 0, size);
@@ -1492,7 +1495,7 @@ int FileStore::mount()
}
dout(0) << "mount found snaps " << snaps << dendl;
- if (cluster_snaps.size())
+ if (!cluster_snaps.empty())
dout(0) << "mount found cluster snaps " << cluster_snaps << dendl;
}
@@ -2478,7 +2481,7 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n
{
coll_t cid = i.get_cid();
if (_check_replay_guard(cid, spos) > 0)
- r = _create_collection(cid);
+ r = _create_collection(cid, spos);
}
break;
@@ -2593,6 +2596,15 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n
uint32_t bits(i.get_u32());
uint32_t rem(i.get_u32());
coll_t dest(i.get_cid());
+ r = _split_collection_create(cid, bits, rem, dest, spos);
+ }
+ break;
+ case Transaction::OP_SPLIT_COLLECTION2:
+ {
+ coll_t cid(i.get_cid());
+ uint32_t bits(i.get_u32());
+ uint32_t rem(i.get_u32());
+ coll_t dest(i.get_cid());
r = _split_collection(cid, bits, rem, dest, spos);
}
break;
@@ -2607,7 +2619,8 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n
if (r == -ENOENT && !(op == Transaction::OP_CLONERANGE ||
op == Transaction::OP_CLONE ||
- op == Transaction::OP_CLONERANGE2))
+ op == Transaction::OP_CLONERANGE2 ||
+ op == Transaction::OP_COLL_ADD))
// -ENOENT is normally okay
// ...including on a replayed OP_RMCOLL with !stable_commits
ok = true;
@@ -3738,7 +3751,7 @@ int FileStore::getattr(coll_t cid, const hobject_t& oid, const char *name, buffe
dout(10) << __func__ << " get_xattrs err r =" << r << dendl;
goto out;
}
- if (!got.size()) {
+ if (got.empty()) {
dout(10) << __func__ << " got.size() is 0" << dendl;
return -ENODATA;
}
@@ -4294,7 +4307,7 @@ bool FileStore::collection_empty(coll_t c)
assert(!m_filestore_fail_eio || r != -EIO);
return false;
}
- return ls.size() > 0;
+ return !ls.empty();
}
int FileStore::collection_list_range(coll_t c, hobject_t start, hobject_t end,
@@ -4315,11 +4328,11 @@ int FileStore::collection_list_range(coll_t c, hobject_t start, hobject_t end,
ls->insert(ls->end(), next_objects.begin(), next_objects.end());
// special case for empty collection
- if (ls->size() == 0) {
+ if (ls->empty()) {
break;
}
- while (ls->size() > 0 && ls->back() >= end) {
+ while (!ls->empty() && ls->back() >= end) {
ls->pop_back();
done = true;
}
@@ -4454,6 +4467,30 @@ ObjectMap::ObjectMapIterator FileStore::get_omap_iterator(coll_t c,
return object_map->get_iterator(hoid);
}
+int FileStore::_create_collection(
+ coll_t c,
+ const SequencerPosition &spos)
+{
+ char fn[PATH_MAX];
+ get_cdir(c, fn, sizeof(fn));
+ dout(15) << "create_collection " << fn << dendl;
+ int r = ::mkdir(fn, 0755);
+ if (r < 0)
+ r = -errno;
+ if (r == -EEXIST && replaying)
+ r = 0;
+ dout(10) << "create_collection " << fn << " = " << r << dendl;
+
+ if (r < 0)
+ return r;
+ r = init_index(c);
+ if (r < 0)
+ return r;
+ _set_replay_guard(c, spos);
+ return 0;
+}
+
+// DEPRECATED -- remove with _split_collection_create
int FileStore::_create_collection(coll_t c)
{
char fn[PATH_MAX];
@@ -4609,6 +4646,43 @@ int FileStore::_split_collection(coll_t cid,
const SequencerPosition &spos)
{
dout(15) << __func__ << " " << cid << " bits: " << bits << dendl;
+ int dstcmp = _check_replay_guard(dest, spos);
+ if (dstcmp < 0)
+ return 0;
+ if (dstcmp > 0 && !collection_empty(dest))
+ return -ENOTEMPTY;
+
+ int srccmp = _check_replay_guard(cid, spos);
+ if (srccmp < 0)
+ return 0;
+
+ _set_replay_guard(cid, spos, true);
+ _set_replay_guard(dest, spos, true);
+
+ Index from;
+ int r = get_index(cid, &from);
+
+ Index to;
+ if (!r)
+ r = get_index(dest, &to);
+
+ if (!r)
+ r = from->split(rem, bits, to);
+
+ _close_replay_guard(cid, spos);
+ _close_replay_guard(dest, spos);
+ return r;
+}
+
+// DEPRECATED: remove once we are sure there won't be any such transactions
+// replayed
+int FileStore::_split_collection_create(coll_t cid,
+ uint32_t bits,
+ uint32_t rem,
+ coll_t dest,
+ const SequencerPosition &spos)
+{
+ dout(15) << __func__ << " " << cid << " bits: " << bits << dendl;
int r = _create_collection(dest);
if (r < 0 && !(r == -EEXIST && replaying))
return r;
diff --git a/src/os/FileStore.h b/src/os/FileStore.h
index b781de2b432..3336e59378e 100644
--- a/src/os/FileStore.h
+++ b/src/os/FileStore.h
@@ -453,6 +453,7 @@ public:
ObjectMap::ObjectMapIterator get_omap_iterator(coll_t c, const hobject_t &hoid);
int _create_collection(coll_t c);
+ int _create_collection(coll_t c, const SequencerPosition &spos);
int _destroy_collection(coll_t c);
int _collection_add(coll_t c, coll_t ocid, const hobject_t& o,
const SequencerPosition& spos);
@@ -475,6 +476,9 @@ private:
const SequencerPosition &spos);
int _split_collection(coll_t cid, uint32_t bits, uint32_t rem, coll_t dest,
const SequencerPosition &spos);
+ int _split_collection_create(coll_t cid, uint32_t bits, uint32_t rem,
+ coll_t dest,
+ const SequencerPosition &spos);
virtual const char** get_tracked_conf_keys() const;
virtual void handle_conf_change(const struct md_config_t *conf,
diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc
index d0d155c8d18..a1d369d8e50 100644
--- a/src/os/HashIndex.cc
+++ b/src/os/HashIndex.cc
@@ -346,7 +346,7 @@ int HashIndex::recursive_remove(const vector<string> &path) {
r = list_objects(path, 0, 0, &objects);
if (r < 0)
return r;
- if (objects.size())
+ if (!objects.empty())
return -ENOTEMPTY;
vector<string> subdir(path);
for (set<string>::iterator i = subdirs.begin();
diff --git a/src/os/LFNIndex.cc b/src/os/LFNIndex.cc
index 5e505638d15..412100fe604 100644
--- a/src/os/LFNIndex.cc
+++ b/src/os/LFNIndex.cc
@@ -235,7 +235,7 @@ int LFNIndex::remove_objects(const vector<string> &dir,
candidate->second.second));
candidate++;
}
- if (holes.size() > 0)
+ if (!holes.empty())
clean_chains.insert(lfn_get_short_name(to_clean->second, 0));
}
return 0;
@@ -893,8 +893,7 @@ bool LFNIndex::lfn_parse_object_name_keyless(const string &long_name, hobject_t
bool r = parse_object(long_name.c_str(), *out);
int64_t pool = -1;
pg_t pg;
- snapid_t snap;
- if (coll().is_pg(pg, snap))
+ if (coll().is_pg_prefix(pg))
pool = (int64_t)pg.pool();
out->pool = pool;
if (!r) return r;
@@ -985,8 +984,7 @@ bool LFNIndex::lfn_parse_object_name_poolless(const string &long_name,
int64_t pool = -1;
pg_t pg;
- snapid_t pg_snap;
- if (coll().is_pg(pg, pg_snap))
+ if (coll().is_pg_prefix(pg))
pool = (int64_t)pg.pool();
(*out) = hobject_t(name, key, snap, hash, pool);
return true;
diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc
index 70e8b6ed19e..813356f33ed 100644
--- a/src/os/ObjectStore.cc
+++ b/src/os/ObjectStore.cc
@@ -419,6 +419,19 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f)
uint32_t bits(i.get_u32());
uint32_t rem(i.get_u32());
coll_t dest(i.get_cid());
+ f->dump_string("op_name", "op_split_collection_create");
+ f->dump_stream("collection") << cid;
+ f->dump_stream("bits") << bits;
+ f->dump_stream("rem") << rem;
+ f->dump_stream("dest") << dest;
+ }
+
+ case Transaction::OP_SPLIT_COLLECTION2:
+ {
+ coll_t cid(i.get_cid());
+ uint32_t bits(i.get_u32());
+ uint32_t rem(i.get_u32());
+ coll_t dest(i.get_cid());
f->dump_string("op_name", "op_split_collection");
f->dump_stream("collection") << cid;
f->dump_stream("bits") << bits;
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h
index 504422981f4..e88a67fe66b 100644
--- a/src/os/ObjectStore.h
+++ b/src/os/ObjectStore.h
@@ -153,6 +153,8 @@ public:
OP_OMAP_RMKEYS = 33, // cid, keyset
OP_OMAP_SETHEADER = 34, // cid, header
OP_SPLIT_COLLECTION = 35, // cid, bits, destination
+ OP_SPLIT_COLLECTION2 = 36, /* cid, bits, destination
+ doesn't create the destination */
};
private:
@@ -555,7 +557,7 @@ public:
uint32_t bits,
uint32_t rem,
coll_t destination) {
- __u32 op = OP_SPLIT_COLLECTION;
+ __u32 op = OP_SPLIT_COLLECTION2;
::encode(op, tbl);
::encode(cid, tbl);
::encode(bits, tbl);
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 09fb58400e9..a1546bc606d 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -139,6 +139,8 @@ static CompatSet get_osd_compat_set() {
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
ceph_osd_feature_incompat);
}
@@ -147,6 +149,7 @@ OSDService::OSDService(OSD *osd) :
osd(osd),
whoami(osd->whoami), store(osd->store), clog(osd->clog),
pg_recovery_stats(osd->pg_recovery_stats),
+ infos_oid(sobject_t("infos", CEPH_NOSNAP)),
cluster_messenger(osd->cluster_messenger),
client_messenger(osd->client_messenger),
logger(osd->logger),
@@ -922,6 +925,17 @@ int OSD::init()
delete store;
return -EINVAL;
}
+
+ // make sure info object exists
+ if (!store->exists(coll_t::META_COLL, service.infos_oid)) {
+ dout(10) << "init creating/touching infos object" << dendl;
+ ObjectStore::Transaction t;
+ t.touch(coll_t::META_COLL, service.infos_oid);
+ r = store->apply_transaction(t);
+ if (r < 0)
+ return r;
+ }
+
if (osd_compat.compare(superblock.compat_features) != 0) {
// We need to persist the new compat_set before we
// do anything else
@@ -1587,13 +1601,15 @@ void OSD::load_pgs()
dout(10) << "pgid " << pgid << " coll " << coll_t(pgid) << dendl;
bufferlist bl;
- epoch_t map_epoch = PG::peek_map_epoch(store, coll_t(pgid), &bl);
+ epoch_t map_epoch = PG::peek_map_epoch(store, coll_t(pgid), service.infos_oid, &bl);
PG *pg = _open_lock_pg(map_epoch == 0 ? osdmap : service.get_map(map_epoch), pgid);
// read pg state, log
pg->read_state(store, bl);
+ pg->check_ondisk_snap_colls(i->second);
+
set<pg_t> split_pgs;
if (osdmap->have_pg_pool(pg->info.pgid.pool()) &&
pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()),
@@ -1616,7 +1632,7 @@ void OSD::load_pgs()
pg->unlock();
}
dout(10) << "load_pgs done" << dendl;
-
+
build_past_intervals_parallel();
}
@@ -1727,7 +1743,9 @@ void OSD::build_past_intervals_parallel()
int num = 0;
for (map<PG*,pistate>::iterator i = pis.begin(); i != pis.end(); ++i) {
PG *pg = i->first;
- pg->write_info(t);
+ pg->dirty_big_info = true;
+ pg->dirty_info = true;
+ pg->write_if_dirty(t);
// don't let the transaction get too big
if (++num >= g_conf->osd_target_transaction_size) {
@@ -1848,7 +1866,7 @@ void OSD::calc_priors_during(pg_t pgid, epoch_t start, epoch_t end, set<int>& ps
pset.insert(acting[i]);
up++;
}
- if (!up && acting.size()) {
+ if (!up && !acting.empty()) {
// sucky. add down osds, even tho we can't reach them right now.
for (unsigned i=0; i<acting.size(); i++)
if (acting[i] != whoami)
@@ -3074,7 +3092,7 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
dout(20) << "do_command tid " << tid << " " << cmd << dendl;
- if (cmd.size() == 0) {
+ if (cmd.empty()) {
ss << "no command given";
goto out;
}
@@ -3154,7 +3172,7 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
<< (end-start) << " sec at " << prettybyte_t(rate) << "/sec";
}
- else if (cmd.size() >= 1 && cmd[0] == "flush_pg_stats") {
+ else if (!cmd.empty() && cmd[0] == "flush_pg_stats") {
flush_pg_stats();
}
@@ -4734,6 +4752,8 @@ void OSD::split_pgs(
dout(10) << "m_seed " << i->ps() << dendl;
dout(10) << "split_bits is " << split_bits << dendl;
+ rctx->transaction->create_collection(
+ coll_t(*i));
rctx->transaction->split_collection(
coll_t(parent->info.pgid),
split_bits,
@@ -4744,6 +4764,8 @@ void OSD::split_pgs(
++k) {
for (snapid_t j = k.get_start(); j < k.get_start() + k.get_len();
++j) {
+ rctx->transaction->create_collection(
+ coll_t(*i, j));
rctx->transaction->split_collection(
coll_t(parent->info.pgid, j),
split_bits,
@@ -4844,7 +4866,7 @@ void OSD::split_pg(PG *parent, map<pg_t,PG*>& children, ObjectStore::Transaction
object_info_t oi(bv);
t.collection_move(coll_t(pgid), coll_t(parentid), poid);
- if (oi.snaps.size()) {
+ if (!oi.snaps.empty()) {
snapid_t first = oi.snaps[0];
t.collection_move(coll_t(pgid, first), coll_t(parentid), poid);
if (oi.snaps.size() > 1) {
@@ -5412,7 +5434,8 @@ void OSD::handle_pg_trim(OpRequestRef op)
// primary is instructing us to trim
ObjectStore::Transaction *t = new ObjectStore::Transaction;
pg->trim(*t, m->trim_to);
- pg->write_info(*t);
+ pg->dirty_info = true;
+ pg->write_if_dirty(*t);
int tr = store->queue_transaction(pg->osr.get(), t,
new ObjectStore::C_DeleteTransaction(t));
assert(tr == 0);
@@ -5881,7 +5904,7 @@ void OSD::do_recovery(PG *pg)
*/
if (!started && pg->have_unfound()) {
pg->discover_all_missing(*rctx.query_map);
- if (!rctx.query_map->size()) {
+ if (rctx.query_map->empty()) {
dout(10) << "do_recovery no luck, giving up on this pg for now" << dendl;
recovery_wq.lock();
recovery_wq._dequeue(pg);
@@ -6380,7 +6403,7 @@ void OSD::process_peering_events(
same_interval_since = MAX(pg->info.history.same_interval_since,
same_interval_since);
pg->write_if_dirty(*rctx.transaction);
- if (split_pgs.size()) {
+ if (!split_pgs.empty()) {
rctx.on_applied->add(new C_CompleteSplits(this, split_pgs));
split_pgs.clear();
}
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index b411c177a36..5680acca178 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -31,7 +31,6 @@
#include "os/ObjectStore.h"
#include "OSDCap.h"
-#include "common/DecayCounter.h"
#include "osd/ClassHandler.h"
#include "include/CompatSet.h"
@@ -170,6 +169,7 @@ public:
ObjectStore *&store;
LogClient &clog;
PGRecoveryStats &pg_recovery_stats;
+ hobject_t infos_oid;
private:
Messenger *&cluster_messenger;
Messenger *&client_messenger;
@@ -262,7 +262,7 @@ public:
}
bool first_scrub_stamp(pair<utime_t, pg_t> *out) {
Mutex::Locker l(sched_scrub_lock);
- if (last_scrub_pg.size() == 0)
+ if (last_scrub_pg.empty())
return false;
set< pair<utime_t, pg_t> >::iterator iter = last_scrub_pg.begin();
*out = *iter;
@@ -271,7 +271,7 @@ public:
bool next_scrub_stamp(pair<utime_t, pg_t> next,
pair<utime_t, pg_t> *out) {
Mutex::Locker l(sched_scrub_lock);
- if (last_scrub_pg.size() == 0)
+ if (last_scrub_pg.empty())
return false;
set< pair<utime_t, pg_t> >::iterator iter = last_scrub_pg.lower_bound(next);
if (iter == last_scrub_pg.end())
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index c7d044ac6fd..6b692d407a8 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1654,6 +1654,15 @@ void OSDMap::print_summary(ostream& out) const
out << " nearfull";
}
+bool OSDMap::crush_ruleset_in_use(int ruleset) const
+{
+ for (map<int64_t,pg_pool_t>::const_iterator p = pools.begin(); p != pools.end(); ++p) {
+ if (p->second.crush_ruleset == ruleset)
+ return true;
+ }
+ return false;
+}
+
void OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
int nosd, int pg_bits, int pgp_bits)
{
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index d161fa7436b..70ec263e4d8 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -553,6 +553,7 @@ public:
static void build_simple_crush_map_from_conf(CephContext *cct, CrushWrapper& crush,
map<int, const char*>& rulesets);
+ bool crush_ruleset_in_use(int ruleset) const;
private:
void print_osd_line(int cur, ostream *out, Formatter *f) const;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index bd66e4fe092..bc6e39bdb96 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -63,8 +63,10 @@ PG::PG(OSDService *o, OSDMapRef curmap,
const hobject_t& ioid) :
osd(o), osdmap_ref(curmap), pool(_pool),
_lock("PG::_lock"),
- ref(0), deleting(false), dirty_info(false), dirty_log(false),
- info(p), coll(p), log_oid(loid), biginfo_oid(ioid),
+ ref(0), deleting(false), dirty_info(false), dirty_big_info(false), dirty_log(false),
+ info(p),
+ info_struct_v(0),
+ coll(p), log_oid(loid), biginfo_oid(ioid),
recovery_item(this), scrub_item(this), scrub_finalize_item(this), snap_trim_item(this), stat_queue_item(this),
recovery_ops_active(0),
waiting_on_backfill(0),
@@ -97,6 +99,7 @@ void PG::lock(bool no_lockdep)
_lock.Lock(no_lockdep);
// if we have unrecorded dirty state with the lock dropped, there is a bug
assert(!dirty_info);
+ assert(!dirty_big_info);
assert(!dirty_log);
dout(30) << "lock" << dendl;
@@ -107,6 +110,7 @@ void PG::lock_with_map_lock_held(bool no_lockdep)
_lock.Lock(no_lockdep);
// if we have unrecorded dirty state with the lock dropped, there is a bug
assert(!dirty_info);
+ assert(!dirty_big_info);
assert(!dirty_log);
dout(30) << "lock_with_map_lock_held" << dendl;
@@ -138,7 +142,7 @@ std::string PG::gen_prefix() const
-void PG::IndexedLog::trim(ObjectStore::Transaction& t, eversion_t s)
+void PG::IndexedLog::trim(ObjectStore::Transaction& t, hobject_t& log_oid, eversion_t s)
{
if (complete_to != log.end() &&
complete_to->version <= s) {
@@ -146,14 +150,17 @@ void PG::IndexedLog::trim(ObjectStore::Transaction& t, eversion_t s)
<< " on " << *this << dendl;
}
+ set<string> keys_to_rm;
while (!log.empty()) {
pg_log_entry_t &e = *log.begin();
if (e.version > s)
break;
generic_dout(20) << "trim " << e << dendl;
unindex(e); // remove from index,
+ keys_to_rm.insert(e.get_key_name());
log.pop_front(); // from log
}
+ t.omap_rmkeys(coll_t::META_COLL, log_oid, keys_to_rm);
// raise tail?
if (tail < s)
@@ -462,6 +469,7 @@ void PG::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead)
merge_old_entry(t, *d);
dirty_info = true;
+ dirty_big_info = true;
dirty_log = true;
}
@@ -597,6 +605,7 @@ void PG::merge_log(ObjectStore::Transaction& t,
if (changed) {
dirty_info = true;
+ dirty_big_info = true;
dirty_log = true;
}
}
@@ -881,6 +890,7 @@ void PG::generate_past_intervals()
// record our work.
dirty_info = true;
+ dirty_big_info = true;
}
/*
@@ -897,6 +907,7 @@ void PG::trim_past_intervals()
return;
dout(10) << __func__ << ": trimming " << pif->second << dendl;
past_intervals.erase(pif++);
+ dirty_big_info = true;
}
}
@@ -1409,6 +1420,7 @@ void PG::activate(ObjectStore::Transaction& t,
// write pg info, log
dirty_info = true;
+ dirty_big_info = true; // maybe
dirty_log = true;
// clean up stray objects
@@ -1760,7 +1772,8 @@ void PG::_activate_committed(epoch_t e)
if (dirty_info) {
ObjectStore::Transaction *t = new ObjectStore::Transaction;
- write_info(*t);
+ dirty_info = true;
+ write_if_dirty(*t);
int tr = osd->store->queue_transaction(osr.get(), t);
assert(tr == 0);
}
@@ -2061,8 +2074,10 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
_split_into(child_pgid, child, split_bits);
child->dirty_info = true;
+ child->dirty_big_info = true;
child->dirty_log = true;
dirty_info = true;
+ dirty_big_info = true;
dirty_log = true;
}
@@ -2307,34 +2322,57 @@ void PG::init(int role, vector<int>& newup, vector<int>& newacting, pg_history_t
reg_next_scrub();
- write_info(*t);
- write_log(*t);
+ dirty_info = true;
+ dirty_big_info = true;
+ dirty_log = true;
+ write_if_dirty(*t);
}
void PG::write_info(ObjectStore::Transaction& t)
{
// pg state
- bufferlist infobl;
- __u8 struct_v = 5;
- ::encode(struct_v, infobl);
- ::encode(get_osdmap()->get_epoch(), infobl);
- t.collection_setattr(coll, "info", infobl);
-
- // potentially big stuff
- bufferlist bigbl;
- ::encode(past_intervals, bigbl);
- ::encode(snap_collections, bigbl);
- ::encode(info, bigbl);
- dout(20) << "write_info bigbl " << bigbl.length() << dendl;
- t.truncate(coll_t::META_COLL, biginfo_oid, 0);
- t.write(coll_t::META_COLL, biginfo_oid, 0, bigbl.length(), bigbl);
+ __u8 cur_struct_v = 6;
+
+ assert(info_struct_v <= cur_struct_v);
+
+ // Only need to write struct_v to attr when upgrading
+ if (info_struct_v < cur_struct_v) {
+ bufferlist attrbl;
+ info_struct_v = cur_struct_v;
+ ::encode(info_struct_v, attrbl);
+ t.collection_setattr(coll, "info", attrbl);
+ }
+
+ // info. store purged_snaps separately.
+ interval_set<snapid_t> purged_snaps;
+ map<string,bufferlist> v;
+ ::encode(get_osdmap()->get_epoch(), v[get_epoch_key(info.pgid)]);
+ purged_snaps.swap(info.purged_snaps);
+ ::encode(info, v[get_info_key(info.pgid)]);
+ purged_snaps.swap(info.purged_snaps);
+
+ if (dirty_big_info) {
+ // potentially big stuff
+ bufferlist& bigbl = v[get_biginfo_key(info.pgid)];
+ ::encode(past_intervals, bigbl);
+ ::encode(snap_collections, bigbl);
+ ::encode(info.purged_snaps, bigbl);
+ dout(20) << "write_info bigbl " << bigbl.length() << dendl;
+ }
+
+ t.omap_setkeys(coll_t::META_COLL, osd->infos_oid, v);
dirty_info = false;
+ dirty_big_info = false;
}
-epoch_t PG::peek_map_epoch(ObjectStore *store, coll_t coll, bufferlist *bl)
+epoch_t PG::peek_map_epoch(ObjectStore *store, coll_t coll, hobject_t &infos_oid, bufferlist *bl)
{
assert(bl);
+ pg_t pgid;
+ snapid_t snap;
+ bool ok = coll.is_pg(pgid, snap);
+ assert(ok);
store->collection_getattr(coll, "info", *bl);
bufferlist::iterator bp = bl->begin();
__u8 struct_v = 0;
@@ -2342,50 +2380,49 @@ epoch_t PG::peek_map_epoch(ObjectStore *store, coll_t coll, bufferlist *bl)
if (struct_v < 5)
return 0;
epoch_t cur_epoch = 0;
- ::decode(cur_epoch, bp);
+ if (struct_v < 6) {
+ ::decode(cur_epoch, bp);
+ } else {
+ // get epoch out of leveldb
+ bufferlist tmpbl;
+ string ek = get_epoch_key(pgid);
+ set<string> keys;
+ keys.insert(get_epoch_key(pgid));
+ map<string,bufferlist> values;
+ store->omap_get_values(coll_t::META_COLL, infos_oid, keys, &values);
+ assert(values.size() == 1);
+ tmpbl = values[ek];
+ bufferlist::iterator p = tmpbl.begin();
+ ::decode(cur_epoch, p);
+ }
return cur_epoch;
}
void PG::write_log(ObjectStore::Transaction& t)
{
dout(10) << "write_log" << dendl;
-
- // assemble buffer
- bufferlist bl;
-
- // build buffer
- ondisklog.tail = 0;
+ t.remove(coll_t::META_COLL, log_oid);
+ t.touch(coll_t::META_COLL, log_oid);
+ map<string,bufferlist> keys;
for (list<pg_log_entry_t>::iterator p = log.log.begin();
p != log.log.end();
p++) {
- uint64_t startoff = bl.length();
-
- bufferlist ebl(sizeof(*p)*2);
- ::encode(*p, ebl);
- __u32 crc = ebl.crc32c(0);
- ::encode(ebl, bl);
- ::encode(crc, bl);
-
- p->offset = startoff;
+ bufferlist bl(sizeof(*p) * 2);
+ p->encode_with_checksum(bl);
+ keys[p->get_key_name()].claim(bl);
}
- ondisklog.head = bl.length();
- ondisklog.has_checksums = true;
+ dout(10) << "write_log " << keys.size() << " keys" << dendl;
- // write it
- t.remove(coll_t::META_COLL, log_oid );
- t.write(coll_t::META_COLL, log_oid , 0, bl.length(), bl);
+ ::encode(ondisklog.divergent_priors, keys["divergent_priors"]);
+
+ t.omap_setkeys(coll_t::META_COLL, log_oid, keys);
- bufferlist blb(sizeof(ondisklog));
- ::encode(ondisklog, blb);
- t.collection_setattr(coll, "ondisklog", blb);
-
- dout(10) << "write_log to " << ondisklog.tail << "~" << ondisklog.length() << dendl;
dirty_log = false;
}
void PG::write_if_dirty(ObjectStore::Transaction& t)
{
- if (dirty_info)
+ if (dirty_big_info || dirty_info)
write_info(t);
if (dirty_log)
write_log(t);
@@ -2403,45 +2440,9 @@ void PG::trim(ObjectStore::Transaction& t, eversion_t trim_to)
assert(trim_to <= info.last_complete);
dout(10) << "trim " << log << " to " << trim_to << dendl;
- log.trim(t, trim_to);
+ log.trim(t, log_oid, trim_to);
info.log_tail = log.tail;
- trim_ondisklog(t);
- }
-}
-
-void PG::trim_ondisklog(ObjectStore::Transaction& t)
-{
- uint64_t new_tail;
- if (log.empty()) {
- new_tail = ondisklog.head;
- } else {
- new_tail = log.log.front().offset;
- }
- bool same_block = (new_tail & ~4095) == (ondisklog.tail & ~4095);
- dout(15) << "trim_ondisklog tail " << ondisklog.tail << " -> " << new_tail
- << ", now " << new_tail << "~" << (ondisklog.head - new_tail)
- << " " << (same_block ? "(same block)" : "(different block)")
- << dendl;
- assert(new_tail >= ondisklog.tail);
-
- if (same_block)
- return;
-
- ondisklog.tail = new_tail;
-
- if (!g_conf->osd_preserve_trimmed_log) {
- uint64_t zt = new_tail & ~4095;
- if (zt > ondisklog.zero_to) {
- t.zero(coll_t::META_COLL, log_oid, ondisklog.zero_to, zt - ondisklog.zero_to);
- dout(15) << "trim_ondisklog zeroing from " << ondisklog.zero_to
- << " to " << zt << dendl;
- ondisklog.zero_to = zt;
- }
}
-
- bufferlist blb(sizeof(ondisklog));
- ::encode(ondisklog, blb);
- t.collection_setattr(coll, "ondisklog", blb);
}
void PG::trim_peers()
@@ -2469,46 +2470,33 @@ void PG::add_log_entry(pg_log_entry_t& e, bufferlist& log_bl)
// log mutation
log.add(e);
- if (ondisklog.has_checksums) {
- bufferlist ebl(sizeof(e)*2);
- ::encode(e, ebl);
- __u32 crc = ebl.crc32c(0);
- ::encode(ebl, log_bl);
- ::encode(crc, log_bl);
- } else {
- ::encode(e, log_bl);
- }
dout(10) << "add_log_entry " << e << dendl;
+
+ e.encode_with_checksum(log_bl);
}
-void PG::append_log(vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t)
+void PG::append_log(
+ vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t)
{
dout(10) << "append_log " << log << " " << logv << dendl;
- bufferlist bl;
+ map<string,bufferlist> keys;
for (vector<pg_log_entry_t>::iterator p = logv.begin();
p != logv.end();
p++) {
- p->offset = ondisklog.head + bl.length();
- add_log_entry(*p, bl);
+ p->offset = 0;
+ add_log_entry(*p, keys[p->get_key_name()]);
}
- dout(10) << "append_log " << ondisklog.tail << "~" << ondisklog.length()
- << " adding " << bl.length() << dendl;
-
- t.write(coll_t::META_COLL, log_oid, ondisklog.head, bl.length(), bl );
- ondisklog.head += bl.length();
-
- bufferlist blb(sizeof(ondisklog));
- ::encode(ondisklog, blb);
- t.collection_setattr(coll, "ondisklog", blb);
- dout(10) << "append_log now " << ondisklog.tail << "~" << ondisklog.length() << dendl;
+ dout(10) << "append_log adding " << keys.size() << " keys" << dendl;
+ t.omap_setkeys(coll_t::META_COLL, log_oid, keys);
trim(t, trim_to);
// update the local pg, pg log
- write_info(t);
+ dirty_info = true;
+ write_if_dirty(t);
}
bool PG::check_log_for_corruption(ObjectStore *store)
@@ -2595,12 +2583,14 @@ std::string PG::get_corrupt_pg_log_name() const
return buf;
}
-int PG::read_info(ObjectStore *store, const coll_t coll, bufferlist &bl,
+int PG::read_info(
+ ObjectStore *store, const coll_t coll, bufferlist &bl,
pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
- hobject_t &biginfo_oid, interval_set<snapid_t> &snap_collections)
+ hobject_t &biginfo_oid, hobject_t &infos_oid,
+ interval_set<snapid_t> &snap_collections, __u8 &struct_v)
{
bufferlist::iterator p = bl.begin();
- __u8 struct_v;
+ bufferlist lbl;
// info
::decode(struct_v, p);
@@ -2610,17 +2600,34 @@ int PG::read_info(ObjectStore *store, const coll_t coll, bufferlist &bl,
::decode(past_intervals, p);
// snap_collections
- bl.clear();
- store->collection_getattr(coll, "snap_collections", bl);
- p = bl.begin();
+ store->collection_getattr(coll, "snap_collections", lbl);
+ p = lbl.begin();
::decode(struct_v, p);
} else {
- bl.clear();
- int r = store->read(coll_t::META_COLL, biginfo_oid, 0, 0, bl);
- if (r < 0)
- return r;
- p = bl.begin();
- ::decode(past_intervals, p);
+ if (struct_v < 6) {
+ int r = store->read(coll_t::META_COLL, biginfo_oid, 0, 0, lbl);
+ if (r < 0)
+ return r;
+ p = lbl.begin();
+ ::decode(past_intervals, p);
+ } else {
+ // get info out of leveldb
+ string k = get_info_key(info.pgid);
+ string bk = get_biginfo_key(info.pgid);
+ set<string> keys;
+ keys.insert(k);
+ keys.insert(bk);
+ map<string,bufferlist> values;
+ store->omap_get_values(coll_t::META_COLL, infos_oid, keys, &values);
+ assert(values.size() == 2);
+ lbl = values[k];
+ p = lbl.begin();
+ ::decode(info, p);
+
+ lbl = values[bk];
+ p = lbl.begin();
+ ::decode(past_intervals, p);
+ }
}
if (struct_v < 3) {
@@ -2634,8 +2641,10 @@ int PG::read_info(ObjectStore *store, const coll_t coll, bufferlist &bl,
}
} else {
::decode(snap_collections, p);
- if (struct_v >= 4)
+ if (struct_v >= 4 && struct_v < 6)
::decode(info, p);
+ else if (struct_v >= 6)
+ ::decode(info.purged_snaps, p);
}
return 0;
}
@@ -2643,41 +2652,23 @@ int PG::read_info(ObjectStore *store, const coll_t coll, bufferlist &bl,
void PG::read_state(ObjectStore *store, bufferlist &bl)
{
int r = read_info(store, coll, bl, info, past_intervals, biginfo_oid,
- snap_collections);
+ osd->infos_oid, snap_collections, info_struct_v);
assert(r >= 0);
- try {
- ostringstream oss;
- read_log(store, coll, log_oid, info, ondisklog, log, missing, oss, this);
- if (oss.str().length())
- osd->clog.error() << oss;
- }
- catch (const buffer::error &e) {
- string cr_log_coll_name(get_corrupt_pg_log_name());
- dout(0) << "Got exception '" << e.what() << "' while reading log. "
- << "Moving corrupted log file to '" << cr_log_coll_name
- << "' for later " << "analysis." << dendl;
-
- ondisklog.zero();
-
- // clear log index
- log.head = log.tail = info.last_update;
-
- // reset info
- info.log_tail = info.last_update;
-
- // Move the corrupt log to a new place and create a new zero-length log entry.
+ ostringstream oss;
+ if (read_log(
+ store, coll, log_oid, info,
+ ondisklog, log, missing, oss, this)) {
+ /* We don't want to leave the old format around in case the next log
+ * write happens to be an append_log()
+ */
ObjectStore::Transaction t;
- coll_t cr_log_coll(cr_log_coll_name);
- t.create_collection(cr_log_coll);
- t.collection_move(cr_log_coll, coll_t::META_COLL, log_oid);
- t.touch(coll_t::META_COLL, log_oid);
- write_info(t);
- store->apply_transaction(t);
-
- info.last_backfill = hobject_t();
- info.stats.stats.clear();
+ write_log(t);
+ int r = osd->store->apply_transaction(t);
+ assert(!r);
}
+ if (oss.str().length())
+ osd->clog.error() << oss;
// log any weirdness
log_weirdness();
@@ -2716,6 +2707,13 @@ void PG::log_weirdness()
<< " last_complete " << info.last_complete
<< " < log.tail " << log.tail
<< "\n";
+
+ if (log.caller_ops.size() > log.log.size()) {
+ osd->clog.error() << info.pgid
+ << " caller_ops.size " << log.caller_ops.size()
+ << " > log size " << log.log.size()
+ << "\n";
+ }
}
coll_t PG::make_snap_collection(ObjectStore::Transaction& t, snapid_t s)
@@ -2723,8 +2721,10 @@ coll_t PG::make_snap_collection(ObjectStore::Transaction& t, snapid_t s)
coll_t c(info.pgid, s);
if (!snap_collections.contains(s)) {
snap_collections.insert(s);
- write_info(t);
- dout(10) << "create_snap_collection " << c << ", set now " << snap_collections << dendl;
+ dirty_big_info = true;
+ write_if_dirty(t);
+ dout(10) << "create_snap_collection " << c << ", set now "
+ << snap_collections << dendl;
t.create_collection(c);
}
return c;
@@ -2744,7 +2744,7 @@ void PG::update_snap_collections(vector<pg_log_entry_t> &log_entries,
} catch (...) {
snaps.clear();
}
- if (snaps.size()) {
+ if (!snaps.empty()) {
make_snap_collection(t, snaps[0]);
if (snaps.size() > 1)
make_snap_collection(t, *(snaps.rbegin()));
@@ -3131,6 +3131,20 @@ void PG::sub_op_scrub_stop(OpRequestRef op)
osd->send_message_osd_cluster(reply, m->get_connection());
}
+
+void PG::check_ondisk_snap_colls(
+ const interval_set<snapid_t> &ondisk_snapcolls)
+{
+ if (!(ondisk_snapcolls == snap_collections)) {
+ derr << "ondisk_snapcolls: " << ondisk_snapcolls
+ << " does not match snap_collections " << snap_collections
+ << " repairing." << dendl;
+ osd->clog.error() << info.pgid << " ondisk snapcolls " << ondisk_snapcolls << " != snap_collections "
+ << snap_collections << ", repairing.";
+ snap_collections = ondisk_snapcolls;
+ }
+}
+
void PG::clear_scrub_reserved()
{
osd->scrub_wq.dequeue(this);
@@ -3719,7 +3733,7 @@ void PG::chunky_scrub() {
start = scrubber.end;
// special case: reached end of file store, implicitly a boundary
- if (objects.size() == 0) {
+ if (objects.empty()) {
break;
}
@@ -4037,13 +4051,13 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
}
}
assert(auth != maps.end());
- if (cur_missing.size()) {
+ if (!cur_missing.empty()) {
missing[*k] = cur_missing;
}
- if (cur_inconsistent.size()) {
+ if (!cur_inconsistent.empty()) {
inconsistent[*k] = cur_inconsistent;
}
- if (cur_inconsistent.size() || cur_missing.size()) {
+ if (!cur_inconsistent.empty() || !cur_missing.empty()) {
authoritative[*k] = auth->first;
}
}
@@ -4078,7 +4092,7 @@ void PG::scrub_compare_maps() {
ss);
dout(2) << ss.str() << dendl;
- if (authoritative.size() || scrubber.inconsistent_snapcolls.size()) {
+ if (!authoritative.empty() || !scrubber.inconsistent_snapcolls.empty()) {
osd->clog.error(ss);
}
@@ -4102,7 +4116,7 @@ void PG::scrub_process_inconsistent() {
bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB);
const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub"));
- if (scrubber.authoritative.size() || scrubber.inconsistent.size()) {
+ if (!scrubber.authoritative.empty() || !scrubber.inconsistent.empty()) {
stringstream ss;
for (map<hobject_t, set<int> >::iterator obj =
scrubber.inconsistent_snapcolls.begin();
@@ -4231,7 +4245,8 @@ void PG::scrub_finish() {
{
ObjectStore::Transaction *t = new ObjectStore::Transaction;
- write_info(*t);
+ dirty_info = true;
+ write_if_dirty(*t);
int tr = osd->store->queue_transaction(osr.get(), t);
assert(tr == 0);
}
@@ -4552,6 +4567,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
if (!lastmap) {
dout(10) << " no lastmap" << dendl;
dirty_info = true;
+ dirty_big_info = true;
} else {
bool new_interval = pg_interval_t::check_new_interval(
oldacting, newacting,
@@ -4563,6 +4579,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
if (new_interval) {
dout(10) << " noting past " << past_intervals.rbegin()->second << dendl;
dirty_info = true;
+ dirty_big_info = true;
}
}
@@ -4649,7 +4666,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
osd->remove_want_pg_temp(info.pgid);
cancel_recovery();
- if (acting.empty() && up.size() && up[0] == osd->whoami) {
+ if (acting.empty() && !up.empty() && up[0] == osd->whoami) {
dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl;
osd->queue_want_pg_temp(info.pgid, acting);
}
@@ -4677,6 +4694,7 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo)
adjust_local_snaps();
}
dirty_info = true;
+ dirty_big_info = true;
}
}
@@ -4689,7 +4707,7 @@ ostream& operator<<(ostream& out, const PG& pg)
out << " r=" << pg.get_role();
out << " lpr=" << pg.get_last_peering_reset();
- if (pg.past_intervals.size()) {
+ if (!pg.past_intervals.empty()) {
out << " pi=" << pg.past_intervals.begin()->first << "-" << pg.past_intervals.rbegin()->second.last
<< "/" << pg.past_intervals.size();
}
@@ -5042,20 +5060,136 @@ std::ostream& operator<<(std::ostream& oss,
#undef dout_prefix
#define dout_prefix if (passedpg) _prefix(_dout, passedpg)
-void PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
+bool PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
pg_missing_t &missing, ostringstream &oss, const PG *passedpg)
{
- // load bounds
- ondisklog.tail = ondisklog.head = 0;
+ dout(10) << "read_log" << dendl;
+ bool rewrite_log = false;
- bufferlist blb;
- store->collection_getattr(coll, "ondisklog", blb);
- bufferlist::iterator p = blb.begin();
- ::decode(ondisklog, p);
+ // legacy?
+ struct stat st;
+ int r = store->stat(coll_t::META_COLL, log_oid, &st);
+ assert(r == 0);
+ if (st.st_size > 0) {
+ read_log_old(store, coll, log_oid, info, ondisklog, log, missing, oss, passedpg);
+ rewrite_log = true;
+ } else {
+ log.tail = info.log_tail;
+ ObjectMap::ObjectMapIterator p = store->get_omap_iterator(coll_t::META_COLL, log_oid);
+ if (p) for (p->seek_to_first(); p->valid() ; p->next()) {
+ bufferlist bl = p->value();//Copy bufferlist before creating iterator
+ bufferlist::iterator bp = bl.begin();
+ if (p->key() == "divergent_priors") {
+ ::decode(ondisklog.divergent_priors, bp);
+ dout(20) << "read_log " << ondisklog.divergent_priors.size() << " divergent_priors" << dendl;
+ } else {
+ pg_log_entry_t e;
+ e.decode_with_checksum(bp);
+ dout(20) << "read_log " << e << dendl;
+ if (!log.log.empty()) {
+ pg_log_entry_t last_e(log.log.back());
+ assert(last_e.version.version == e.version.version - 1);
+ assert(last_e.version.epoch <= e.version.epoch);
+ }
+ log.log.push_back(e);
+ log.head = e.version;
+ }
+ }
+ }
+ log.head = info.last_update;
+ log.index();
+
+ // build missing
+ if (info.last_complete < info.last_update) {
+ dout(10) << "read_log checking for missing items over interval (" << info.last_complete
+ << "," << info.last_update << "]" << dendl;
+
+ set<hobject_t> did;
+ for (list<pg_log_entry_t>::reverse_iterator i = log.log.rbegin();
+ i != log.log.rend();
+ i++) {
+ if (i->version <= info.last_complete) break;
+ if (did.count(i->soid)) continue;
+ did.insert(i->soid);
+
+ if (i->is_delete()) continue;
+
+ bufferlist bv;
+ int r = store->getattr(coll, i->soid, OI_ATTR, bv);
+ if (r >= 0) {
+ object_info_t oi(bv);
+ if (oi.version < i->version) {
+ dout(15) << "read_log missing " << *i << " (have " << oi.version << ")" << dendl;
+ missing.add(i->soid, i->version, oi.version);
+ }
+ } else {
+ dout(15) << "read_log missing " << *i << dendl;
+ missing.add(i->soid, i->version, eversion_t());
+ }
+ }
+ for (map<eversion_t, hobject_t>::reverse_iterator i =
+ ondisklog.divergent_priors.rbegin();
+ i != ondisklog.divergent_priors.rend();
+ ++i) {
+ if (i->first <= info.last_complete) break;
+ if (did.count(i->second)) continue;
+ did.insert(i->second);
+ bufferlist bv;
+ int r = store->getattr(coll, i->second, OI_ATTR, bv);
+ if (r >= 0) {
+ object_info_t oi(bv);
+ /**
+ * 1) we see this entry in the divergent priors mapping
+ * 2) we didn't see an entry for this object in the log
+ *
+ * From 1 & 2 we know that either the object does not exist
+ * or it is at the version specified in the divergent_priors
+ * map since the object would have been deleted atomically
+ * with the addition of the divergent_priors entry, an older
+ * version would not have been recovered, and a newer version
+ * would show up in the log above.
+ */
+ assert(oi.version == i->first);
+ } else {
+ dout(15) << "read_log missing " << *i << dendl;
+ missing.add(i->second, i->first, eversion_t());
+ }
+ }
+ }
+ dout(10) << "read_log done" << dendl;
+ return rewrite_log;
+}
- dout(10) << "read_log " << ondisklog.tail << "~" << ondisklog.length() << dendl;
+void PG::read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid,
+ const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
+ pg_missing_t &missing, ostringstream &oss, const PG *passedpg)
+{
+ // load bounds, based on old OndiskLog encoding.
+ uint64_t ondisklog_tail = 0;
+ uint64_t ondisklog_head = 0;
+ uint64_t ondisklog_zero_to;
+ bool ondisklog_has_checksums;
+ bufferlist blb;
+ store->collection_getattr(coll, "ondisklog", blb);
+ {
+ bufferlist::iterator bl = blb.begin();
+ DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
+ ondisklog_has_checksums = (struct_v >= 2);
+ ::decode(ondisklog_tail, bl);
+ ::decode(ondisklog_head, bl);
+ if (struct_v >= 4)
+ ::decode(ondisklog_zero_to, bl);
+ else
+ ondisklog_zero_to = 0;
+ if (struct_v >= 5)
+ ::decode(ondisklog.divergent_priors, bl);
+ DECODE_FINISH(bl);
+ }
+ uint64_t ondisklog_length = ondisklog_head - ondisklog_tail;
+ dout(10) << "read_log " << ondisklog_tail << "~" << ondisklog_length << dendl;
+
log.tail = info.log_tail;
// In case of sobject_t based encoding, may need to list objects in the store
@@ -5063,15 +5197,15 @@ void PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
bool listed_collection = false;
vector<hobject_t> ls;
- if (ondisklog.head > 0) {
+ if (ondisklog_head > 0) {
// read
bufferlist bl;
- store->read(coll_t::META_COLL, log_oid, ondisklog.tail, ondisklog.length(), bl);
- if (bl.length() < ondisklog.length()) {
+ store->read(coll_t::META_COLL, log_oid, ondisklog_tail, ondisklog_length, bl);
+ if (bl.length() < ondisklog_length) {
std::ostringstream oss;
oss << "read_log got " << bl.length() << " bytes, expected "
- << ondisklog.head << "-" << ondisklog.tail << "="
- << ondisklog.length();
+ << ondisklog_head << "-" << ondisklog_tail << "="
+ << ondisklog_length;
throw read_log_error(oss.str().c_str());
}
@@ -5081,8 +5215,8 @@ void PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
eversion_t last;
bool reorder = false;
while (!p.end()) {
- uint64_t pos = ondisklog.tail + p.get_off();
- if (ondisklog.has_checksums) {
+ uint64_t pos = ondisklog_tail + p.get_off();
+ if (ondisklog_has_checksums) {
bufferlist ebl;
::decode(ebl, p);
__u32 crc;
@@ -5150,19 +5284,19 @@ void PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
}
e.offset = pos;
- uint64_t endpos = ondisklog.tail + p.get_off();
+ uint64_t endpos = ondisklog_tail + p.get_off();
log.log.push_back(e);
last = e.version;
// [repair] at end of log?
if (!p.end() && e.version == info.last_update) {
oss << info.pgid << " log has extra data at "
- << endpos << "~" << (ondisklog.head-endpos) << " after "
+ << endpos << "~" << (ondisklog_head-endpos) << " after "
<< info.last_update << "\n";
dout(0) << "read_log " << endpos << " *** extra gunk at end of log, "
- << "adjusting ondisklog.head" << dendl;
- ondisklog.head = endpos;
+ << "adjusting ondisklog_head" << dendl;
+ ondisklog_head = endpos;
break;
}
}
@@ -5177,68 +5311,6 @@ void PG::read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
log.log.push_back(p->second);
}
}
-
- log.head = info.last_update;
- log.index();
-
- // build missing
- if (info.last_complete < info.last_update) {
- dout(10) << "read_log checking for missing items over interval (" << info.last_complete
- << "," << info.last_update << "]" << dendl;
-
- set<hobject_t> did;
- for (list<pg_log_entry_t>::reverse_iterator i = log.log.rbegin();
- i != log.log.rend();
- i++) {
- if (i->version <= info.last_complete) break;
- if (did.count(i->soid)) continue;
- did.insert(i->soid);
-
- if (i->is_delete()) continue;
-
- bufferlist bv;
- int r = store->getattr(coll, i->soid, OI_ATTR, bv);
- if (r >= 0) {
- object_info_t oi(bv);
- if (oi.version < i->version) {
- dout(15) << "read_log missing " << *i << " (have " << oi.version << ")" << dendl;
- missing.add(i->soid, i->version, oi.version);
- }
- } else {
- dout(15) << "read_log missing " << *i << dendl;
- missing.add(i->soid, i->version, eversion_t());
- }
- }
- for (map<eversion_t, hobject_t>::reverse_iterator i =
- ondisklog.divergent_priors.rbegin();
- i != ondisklog.divergent_priors.rend();
- ++i) {
- if (i->first <= info.last_complete) break;
- if (did.count(i->second)) continue;
- did.insert(i->second);
- bufferlist bv;
- int r = store->getattr(coll, i->second, OI_ATTR, bv);
- if (r >= 0) {
- object_info_t oi(bv);
- /**
- * 1) we see this entry in the divergent priors mapping
- * 2) we didn't see an entry for this object in the log
- *
- * From 1 & 2 we know that either the object does not exist
- * or it is at the version specified in the divergent_priors
- * map since the object would have been deleted atomically
- * with the addition of the divergent_priors entry, an older
- * version would not have been recovered, and a newer version
- * would show up in the log above.
- */
- assert(oi.version == i->first);
- } else {
- dout(15) << "read_log missing " << *i << dendl;
- missing.add(i->second, i->first, eversion_t());
- }
- }
- }
- dout(10) << "read_log done" << dendl;
}
/*------------ Recovery State Machine----------------*/
@@ -6042,6 +6114,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
pg->snap_trimq.union_of(pg->pool.newly_removed_snaps);
dout(10) << *pg << " snap_trimq now " << pg->snap_trimq << dendl;
pg->dirty_info = true;
+ pg->dirty_big_info = true;
}
pg->check_recovery_sources(pg->get_osdmap());
@@ -6359,6 +6432,7 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MLogRec& logevt)
pg->info = msg->info;
pg->reg_next_scrub();
pg->dirty_info = true;
+ pg->dirty_big_info = true; // maybe.
pg->dirty_log = true;
pg->log.claim_log(msg->log);
pg->missing.clear();
@@ -6612,7 +6686,7 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) :
// adjust acting?
if (!pg->choose_acting(newest_update_osd)) {
- if (pg->want_acting.size()) {
+ if (!pg->want_acting.empty()) {
post_event(NeedActingChange());
} else {
post_event(IsIncomplete());
diff --git a/src/osd/PG.h b/src/osd/PG.h
index ba80f8186e6..ec3d4664a90 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -42,8 +42,6 @@
#include "messages/MOSDRepScrub.h"
#include "messages/MOSDPGLog.h"
-#include "common/DecayCounter.h"
-
#include <list>
#include <memory>
#include <string>
@@ -269,7 +267,7 @@ public:
caller_ops[e.reqid] = &(log.back());
}
- void trim(ObjectStore::Transaction &t, eversion_t s);
+ void trim(ObjectStore::Transaction &t, hobject_t& oid, eversion_t s);
ostream& print(ostream& out) const;
};
@@ -336,6 +334,16 @@ public:
f->dump_unsigned("head", head);
f->dump_unsigned("tail", tail);
f->dump_unsigned("zero_to", zero_to);
+ f->open_array_section("divergent_priors");
+ for (map<eversion_t, hobject_t>::const_iterator p = divergent_priors.begin();
+ p != divergent_priors.end();
+ ++p) {
+ f->open_object_section("prior");
+ f->dump_stream("version") << p->first;
+ f->dump_stream("object") << p->second;
+ f->close_section();
+ }
+ f->close_section();
}
static void generate_test_instances(list<OndiskLog*>& o) {
o.push_back(new OndiskLog);
@@ -379,6 +387,7 @@ public:
void unlock() {
//generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;
assert(!dirty_info);
+ assert(!dirty_big_info);
assert(!dirty_log);
_lock.Unlock();
}
@@ -417,13 +426,23 @@ public:
}
- bool dirty_info, dirty_log;
+ bool dirty_info, dirty_big_info, dirty_log;
public:
// pg state
pg_info_t info;
+ __u8 info_struct_v;
const coll_t coll;
IndexedLog log;
+ static string get_info_key(pg_t pgid) {
+ return stringify(pgid) + "_info";
+ }
+ static string get_biginfo_key(pg_t pgid) {
+ return stringify(pgid) + "_biginfo";
+ }
+ static string get_epoch_key(pg_t pgid) {
+ return stringify(pgid) + "_epoch";
+ }
hobject_t log_oid;
hobject_t biginfo_oid;
OndiskLog ondisklog;
@@ -596,7 +615,7 @@ protected:
/// Adjusts begin to the first object
void trim() {
- if (objects.size())
+ if (!objects.empty())
begin = objects.begin()->first;
else
begin = end;
@@ -1002,6 +1021,8 @@ public:
ino_t hino, const hobject_t &hoid,
const map<string, bufferptr> &attrs,
set<snapid_t> *snapcolls) {};
+ void check_ondisk_snap_colls(
+ const interval_set<snapid_t> &ondisk_snapcolls);
void clear_scrub_reserved();
void scrub_reserve_replicas();
void scrub_unreserve_replicas();
@@ -1766,29 +1787,37 @@ public:
// pg on-disk state
void do_pending_flush();
+private:
void write_info(ObjectStore::Transaction& t);
void write_log(ObjectStore::Transaction& t);
+public:
void write_if_dirty(ObjectStore::Transaction& t);
void add_log_entry(pg_log_entry_t& e, bufferlist& log_bl);
- void append_log(vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t);
+ void append_log(
+ vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t);
- static void read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
+ /// return true if the log should be rewritten
+ static bool read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
+ const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
+ pg_missing_t &missing, ostringstream &oss, const PG *passedpg = NULL);
+ static void read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid,
const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
pg_missing_t &missing, ostringstream &oss, const PG *passedpg = NULL);
bool check_log_for_corruption(ObjectStore *store);
void trim(ObjectStore::Transaction& t, eversion_t v);
- void trim_ondisklog(ObjectStore::Transaction& t);
void trim_peers();
std::string get_corrupt_pg_log_name() const;
- static int read_info(ObjectStore *store, const coll_t coll,
+ static int read_info(
+ ObjectStore *store, const coll_t coll,
bufferlist &bl, pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
- hobject_t &biginfo_oid, interval_set<snapid_t> &snap_collections);
+ hobject_t &biginfo_oid, hobject_t &infos_oid,
+ interval_set<snapid_t> &snap_collections, __u8 &);
void read_state(ObjectStore *store, bufferlist &bl);
- static epoch_t peek_map_epoch(ObjectStore *store,
- coll_t coll, bufferlist *bl);
+ static epoch_t peek_map_epoch(ObjectStore *store, coll_t coll,
+ hobject_t &infos_oid, bufferlist *bl);
coll_t make_snap_collection(ObjectStore::Transaction& t, snapid_t sn);
void update_snap_collections(vector<pg_log_entry_t> &log_entries,
ObjectStore::Transaction& t);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 635fde6cc65..d23db2884ed 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -330,7 +330,7 @@ int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss,
mark_all_unfound_lost(mode);
return 0;
}
- else if (cmd.size() >= 1 && cmd[0] == "list_missing") {
+ else if (!cmd.empty() && cmd[0] == "list_missing") {
JSONFormatter jf(true);
hobject_t offset;
if (cmd.size() > 1) {
@@ -1246,7 +1246,8 @@ void ReplicatedPG::do_backfill(OpRequestRef op)
info.stats.stats = m->stats;
ObjectStore::Transaction *t = new ObjectStore::Transaction;
- write_info(*t);
+ dirty_info = true;
+ write_if_dirty(*t);
int tr = osd->store->queue_transaction(osr.get(), t);
assert(tr == 0);
}
@@ -5298,7 +5299,8 @@ void ReplicatedPG::submit_push_complete(ObjectRecoveryInfo &recovery_info,
recover_got(recovery_info.soid, recovery_info.version);
// update pg
- write_info(*t);
+ dirty_info = true;
+ write_if_dirty(*t);
}
ObjectRecoveryInfo ReplicatedPG::recalc_subsets(const ObjectRecoveryInfo& recovery_info)
@@ -6160,7 +6162,8 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
if (missing.num_missing() == 0) {
// advance last_complete since nothing else is missing!
info.last_complete = info.last_update;
- write_info(*t);
+ dirty_info = true;
+ write_if_dirty(*t);
}
osd->store->queue_transaction(osr.get(), t, c, NULL, new C_OSD_OndiskWriteUnlockList(&c->obcs));
@@ -7266,7 +7269,7 @@ static set<snapid_t> get_expected_snap_colls(
bufferlist oiattr;
oiattr.push_back(oiiter->second);
*oi = object_info_t(oiattr);
- if (oi->snaps.size() > 0)
+ if (!oi->snaps.empty())
to_check.insert(*(oi->snaps.begin()));
if (oi->snaps.size() > 1)
to_check.insert(*(oi->snaps.rbegin()));
@@ -7397,7 +7400,8 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&)
ObjectStore::Transaction *t = new ObjectStore::Transaction;
pg->snap_collections.erase(snap_to_trim);
t->remove_collection(col_to_trim);
- pg->write_info(*t);
+ pg->dirty_big_info = true;
+ pg->write_if_dirty(*t);
int r = pg->osd->store->queue_transaction(
NULL, t, new ObjectStore::C_DeleteTransaction(t));
assert(r == 0);
@@ -7453,7 +7457,8 @@ boost::statechart::result ReplicatedPG::RepColTrim::react(const SnapTrim&)
}
t->remove_collection(col_to_trim);
pg->snap_collections.erase(snap_to_trim);
- pg->write_info(*t);
+ pg->dirty_big_info = true;
+ pg->write_if_dirty(*t);
int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t));
assert(r == 0);
return discard_event();
@@ -7563,7 +7568,8 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&
ObjectStore::Transaction *t = new ObjectStore::Transaction;
dout(10) << "removing snap " << sn << " collection " << c << dendl;
pg->snap_collections.erase(sn);
- pg->write_info(*t);
+ pg->dirty_big_info = true;
+ pg->write_if_dirty(*t);
t->remove_collection(c);
int tr = pg->osd->store->queue_transaction(pg->osr.get(), t);
assert(tr == 0);
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 786d0e876b4..8ef0c9b58a1 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -294,10 +294,26 @@ bool coll_t::is_pg(pg_t& pgid, snapid_t& snap) const
const char *snap_start = strchr(cstr, '_');
if (!snap_start)
return false;
- if (strncmp(snap_start, "_head", 5) == 0)
+ if (strncmp(snap_start, "_head", 5) == 0) {
snap = CEPH_NOSNAP;
- else
+ } else {
+ errno = 0;
snap = strtoull(snap_start+1, 0, 16);
+ if (errno)
+ return false;
+ }
+ return true;
+}
+
+bool coll_t::is_pg_prefix(pg_t& pgid) const
+{
+ const char *cstr(str.c_str());
+
+ if (!pgid.parse(cstr))
+ return false;
+ const char *snap_start = strchr(cstr, '_');
+ if (!snap_start)
+ return false;
return true;
}
@@ -1694,6 +1710,34 @@ void pg_query_t::generate_test_instances(list<pg_query_t*>& o)
// -- pg_log_entry_t --
+string pg_log_entry_t::get_key_name() const
+{
+ char key[40];
+ snprintf(key, sizeof(key), "%010u.%020lu", version.epoch, version.version);
+ return string(key);
+}
+
+void pg_log_entry_t::encode_with_checksum(bufferlist& bl) const
+{
+ bufferlist ebl(sizeof(*this)*2);
+ encode(ebl);
+ __u32 crc = ebl.crc32c(0);
+ ::encode(ebl, bl);
+ ::encode(crc, bl);
+}
+
+void pg_log_entry_t::decode_with_checksum(bufferlist::iterator& p)
+{
+ bufferlist bl;
+ ::decode(bl, p);
+ __u32 crc;
+ ::decode(crc, p);
+ if (crc != bl.crc32c(0))
+ throw buffer::malformed_input("bad checksum on pg_log_entry_t");
+ bufferlist::iterator q = bl.begin();
+ decode(q);
+}
+
void pg_log_entry_t::encode(bufferlist &bl) const
{
ENCODE_START(7, 4, bl);
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index e0680574057..558c10ff27b 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -38,6 +38,8 @@
#define CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES CompatSet::Feature(5, "categories")
#define CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL CompatSet::Feature(6, "hobjectpool")
#define CEPH_OSD_FEATURE_INCOMPAT_BIGINFO CompatSet::Feature(7, "biginfo")
+#define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo")
+#define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog")
typedef hobject_t collection_list_handle_t;
@@ -355,6 +357,7 @@ public:
return str < rhs.str;
}
+ bool is_pg_prefix(pg_t& pgid) const;
bool is_pg(pg_t& pgid, snapid_t& snap) const;
bool is_temp(pg_t& pgid) const;
bool is_removal(uint64_t *seq, pg_t *pgid) const;
@@ -1316,6 +1319,10 @@ struct pg_log_entry_t {
return reqid != osd_reqid_t() && (op == MODIFY || op == DELETE);
}
+ string get_key_name() const;
+ void encode_with_checksum(bufferlist& bl) const;
+ void decode_with_checksum(bufferlist::iterator& p);
+
void encode(bufferlist &bl) const;
void decode(bufferlist::iterator &bl);
void dump(Formatter *f) const;
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 265a806dbb2..5ba29a1c794 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -514,7 +514,7 @@ ObjectCacher::~ObjectCacher()
for (vector<hash_map<sobject_t, Object *> >::iterator i = objects.begin();
i != objects.end();
++i)
- assert(!i->size());
+ assert(i->empty());
assert(bh_lru_rest.lru_get_size() == 0);
assert(bh_lru_dirty.lru_get_size() == 0);
assert(ob_lru.lru_get_size() == 0);
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 339499fd96a..21d9df7f3d6 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -986,10 +986,9 @@ tid_t Objecter::_op_submit(Op *op)
assert(client_inc >= 0);
// pick target
- bool check_for_latest_map = false;
num_homeless_ops++; // initially; recalc_op_target() will decrement if it finds a target
int r = recalc_op_target(op);
- check_for_latest_map = (r == RECALC_OP_TARGET_POOL_DNE);
+ bool check_for_latest_map = (r == RECALC_OP_TARGET_POOL_DNE);
// add to gather set(s)
if (op->onack) {
@@ -1124,7 +1123,7 @@ int Objecter::recalc_op_target(Op *op)
OSDSession *s = NULL;
op->used_replica = false;
- if (acting.size()) {
+ if (!acting.empty()) {
int osd;
bool read = (op->flags & CEPH_OSD_FLAG_READ) && (op->flags & CEPH_OSD_FLAG_WRITE) == 0;
if (read && (op->flags & CEPH_OSD_FLAG_BALANCE_READS)) {
diff --git a/src/osdmaptool.cc b/src/osdmaptool.cc
index 6df7eb038c3..66feeb94d81 100644
--- a/src/osdmaptool.cc
+++ b/src/osdmaptool.cc
@@ -120,7 +120,7 @@ int main(int argc, const char **argv)
++i;
}
}
- if (args.size() < 1) {
+ if (args.empty()) {
cerr << me << ": must specify osdmap filename" << std::endl;
usage();
}
diff --git a/src/psim.cc b/src/psim.cc
index b089876e090..89d261a27c8 100644
--- a/src/psim.cc
+++ b/src/psim.cc
@@ -52,7 +52,7 @@ int main(int argc, char **argv)
int x = H(oid);
x = ceph_stable_mod(x, 1023, 1023);
int s = crush_hash32(x) % 15;
- //cout << "psim: x = " << x << " s = " << s << std::endl;
+ //cout << "ceph_psim: x = " << x << " s = " << s << std::endl;
//osds[0] = s;
}
#endif
diff --git a/src/rados.cc b/src/rados.cc
index a850f874ac2..d3de74a810b 100644
--- a/src/rados.cc
+++ b/src/rados.cc
@@ -249,7 +249,7 @@ static int do_copy(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const
for (iter = attrset.begin(); iter != attrset.end(); ++iter) {
write_op.setxattr(iter->first.c_str(), iter->second);
}
- if (omap.size()) {
+ if (!omap.empty()) {
write_op.omap_set(omap);
}
ret = target_ctx.operate(target_oid, &write_op);
@@ -283,7 +283,7 @@ static int do_copy(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const
if (ret < 0)
goto err;
- if (!omap.size())
+ if (omap.empty())
break;
ret = target_ctx.omap_set(target_oid, omap);
diff --git a/src/rbd.cc b/src/rbd.cc
index dd56bc9309e..02a793bf64b 100644
--- a/src/rbd.cc
+++ b/src/rbd.cc
@@ -301,7 +301,7 @@ static int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
if (r < 0)
return r;
string lockstr;
- if (lockers.size()) {
+ if (!lockers.empty()) {
lockstr = (exclusive) ? "excl" : "shr";
}
@@ -317,7 +317,7 @@ static int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
f->close_section();
}
f->dump_int("format", old_format ? 1 : 2);
- if (lockers.size())
+ if (!lockers.empty())
f->dump_string("lock_type", exclusive ? "exclusive" : "shared");
f->close_section();
} else {
@@ -375,7 +375,7 @@ static int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool lflag,
if (f) {
f->close_section();
f->flush(cout);
- } else if (names.size()) {
+ } else if (!names.empty()) {
cout << tbl;
}
@@ -1664,6 +1664,18 @@ static int do_kernel_rm(const char *dev)
if (r < 0)
return r;
+ // let udevadm do its job *before* we try to unmap
+ if (udevadm_settle) {
+ r = system("/sbin/udevadm settle");
+ if (r) {
+ if (r < 0)
+ cerr << "rbd: error executing udevadm as shell command!" << std::endl;
+ else
+ cerr << "rbd: '/sbin/udevadm settle' failed! (" << r << ")" <<std::endl;
+ // ignore the error, though.
+ }
+ }
+
int fd = open("/sys/bus/rbd/remove", O_WRONLY);
if (fd < 0) {
return -errno;
diff --git a/src/rbd_fuse/rbd-fuse.c b/src/rbd_fuse/rbd-fuse.c
index 0b28f63c3ad..5bdaba3a0d9 100644
--- a/src/rbd_fuse/rbd-fuse.c
+++ b/src/rbd_fuse/rbd-fuse.c
@@ -138,7 +138,7 @@ open_rbd_image(const char *image_name)
return -1;
// relies on caller to keep rbd_images up to date
- for (im = rbd_images; im != NULL; i++, im = im->next) {
+ for (im = rbd_images; im != NULL; im = im->next) {
if (strcmp(im->image_name, image_name) == 0) {
break;
}
diff --git a/src/rgw/logrotate.conf b/src/rgw/logrotate.conf
new file mode 100644
index 00000000000..7fb3391bbec
--- /dev/null
+++ b/src/rgw/logrotate.conf
@@ -0,0 +1,24 @@
+/var/log/radosgw/*.log {
+ rotate 7
+ daily
+ compress
+ sharedscripts
+ postrotate
+ if which invoke-rc.d > /dev/null 2>&1 && [ -x `which invoke-rc.d` ]; then
+ invoke-rc.d radosgw reload >/dev/null
+ elif which service > /dev/null 2>&1 && [ -x `which service` ]; then
+ service radosgw reload >/dev/null
+ fi
+ # Possibly reload twice, but depending on ceph.conf the reload above may be a no-op
+ if which initctl > /dev/null 2>&1 && [ -x `which initctl` ]; then
+ # upstart reload isn't very helpful here:
+ # https://bugs.launchpad.net/upstart/+bug/1012938
+ initctl list \
+ | sed -n 's/^\(radosgw\+\)[ \t]\+(\([^ \/]\+\)\/\([^ \/]\+\))[ \t]\+start\/.*$/\1 cluster=\2 id=\3/p' \
+ | while read l; do
+ initctl reload -- $l 2>/dev/null || :
+ done
+ fi
+ endscript
+ missingok
+}
diff --git a/src/rgw/rgw_acl_s3.cc b/src/rgw/rgw_acl_s3.cc
index e1c81e73ac8..8ae57307d7c 100644
--- a/src/rgw/rgw_acl_s3.cc
+++ b/src/rgw/rgw_acl_s3.cc
@@ -264,20 +264,25 @@ bool RGWAccessControlList_S3::xml_end(const char *el) {
return true;
}
-bool RGWAccessControlList_S3::create_canned(string id, string name, string canned_acl)
+bool RGWAccessControlList_S3::create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl)
{
acl_user_map.clear();
grant_map.clear();
+ ACLGrant owner_grant;
+
+ string bid = bucket_owner.get_id();
+ string bname = bucket_owner.get_display_name();
+
/* owner gets full control */
- ACLGrant grant;
- grant.set_canon(id, name, RGW_PERM_FULL_CONTROL);
- add_grant(&grant);
+ owner_grant.set_canon(owner.get_id(), owner.get_display_name(), RGW_PERM_FULL_CONTROL);
+ add_grant(&owner_grant);
if (canned_acl.size() == 0 || canned_acl.compare("private") == 0) {
return true;
}
+ ACLGrant bucket_owner_grant;
ACLGrant group_grant;
if (canned_acl.compare("public-read") == 0) {
group_grant.set_group(ACL_GROUP_ALL_USERS, RGW_PERM_READ);
@@ -290,6 +295,14 @@ bool RGWAccessControlList_S3::create_canned(string id, string name, string canne
} else if (canned_acl.compare("authenticated-read") == 0) {
group_grant.set_group(ACL_GROUP_AUTHENTICATED_USERS, RGW_PERM_READ);
add_grant(&group_grant);
+ } else if (canned_acl.compare("bucket-owner-read") == 0) {
+ bucket_owner_grant.set_canon(bid, bname, RGW_PERM_READ);
+ if (bid.compare(owner.get_id()) != 0)
+ add_grant(&bucket_owner_grant);
+ } else if (canned_acl.compare("bucket-owner-full-control") == 0) {
+ bucket_owner_grant.set_canon(bid, bname, RGW_PERM_FULL_CONTROL);
+ if (bid.compare(owner.get_id()) != 0)
+ add_grant(&bucket_owner_grant);
} else {
return false;
}
diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h
index 1e2ffe43242..453f68161f0 100644
--- a/src/rgw/rgw_acl_s3.h
+++ b/src/rgw/rgw_acl_s3.h
@@ -66,7 +66,7 @@ public:
out << "</AccessControlList>";
}
- bool create_canned(string id, string name, string canned_acl);
+ bool create_canned(ACLOwner& owner, ACLOwner& bucket_owner, const string& canned_acl);
};
class ACLOwner_S3 : public ACLOwner, public XMLObj
@@ -104,11 +104,11 @@ public:
}
int rebuild(RGWRados *store, ACLOwner *owner, RGWAccessControlPolicy& dest);
bool compare_group_name(string& id, ACLGroupTypeEnum group);
- virtual bool create_canned(string id, string name, string canned_acl) {
+
+ virtual bool create_canned(ACLOwner& _owner, ACLOwner& bucket_owner, string canned_acl) {
RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl);
- bool ret = _acl.create_canned(id, name, canned_acl);
- owner.set_id(id);
- owner.set_name(name);
+ bool ret = _acl.create_canned(_owner, bucket_owner, canned_acl);
+ owner = _owner;
return ret;
}
};
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index bcb52c4420b..2095238874a 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -707,18 +707,16 @@ static void check_bad_user_bucket_mapping(RGWRados *store, const string& user_id
static int remove_object(RGWRados *store, rgw_bucket& bucket, std::string& object)
{
- int ret = -EINVAL;
RGWRadosCtx *rctx = new RGWRadosCtx(store);
rgw_obj obj(bucket,object);
- ret = store->delete_obj(rctx, obj);
+ int ret = store->delete_obj(rctx, obj);
return ret;
}
static int remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children)
{
- int ret;
map<RGWObjCategory, RGWBucketStats> stats;
std::vector<RGWObjEnt> objs;
std::string prefix, delim, marker, ns;
@@ -727,7 +725,8 @@ static int remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_childr
RGWBucketInfo info;
bufferlist bl;
- ret = store->get_bucket_stats(bucket, stats);
+ int ret = store->get_bucket_stats(bucket, stats);
+
if (ret < 0)
return ret;
@@ -750,7 +749,7 @@ static int remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_childr
if (ret < 0)
return ret;
- while (objs.size() > 0) {
+ while (!objs.empty()) {
std::vector<RGWObjEnt>::iterator it = objs.begin();
for (it = objs.begin(); it != objs.end(); it++) {
ret = remove_object(store, bucket, (*it).name);
@@ -945,7 +944,7 @@ int main(int argc, char **argv)
}
}
- if (args.size() == 0) {
+ if (args.empty()) {
return usage();
}
else {
@@ -1558,7 +1557,7 @@ next:
if (rgw_read_user_buckets(store, user_id, buckets, false) >= 0) {
map<string, RGWBucketEnt>& m = buckets.get_buckets();
- if (m.size() > 0 && purge_data) {
+ if (!m.empty() && purge_data) {
for (std::map<string, RGWBucketEnt>::iterator it = m.begin(); it != m.end(); it++) {
ret = remove_bucket(store, ((*it).second).bucket, true);
@@ -1567,7 +1566,7 @@ next:
}
}
- if (m.size() > 0 && !purge_data) {
+ if (!m.empty() && !purge_data) {
cerr << "ERROR: specify --purge-data to remove a user with a non-empty bucket list" << std::endl;
return 1;
}
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 4b808fcbe74..cd1ebaa71f6 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -27,6 +27,7 @@
#include <map>
#include "include/types.h"
#include "include/utime.h"
+#include "rgw_acl.h"
using namespace std;
@@ -597,7 +598,8 @@ struct req_state {
rgw_bucket bucket;
string bucket_name_str;
string object_str;
- string bucket_owner;
+ ACLOwner bucket_owner;
+ ACLOwner owner;
map<string, string> x_meta_map;
bool has_bad_meta;
diff --git a/src/rgw/rgw_gc.cc b/src/rgw/rgw_gc.cc
index d7861e61250..11d7f0e38de 100644
--- a/src/rgw/rgw_gc.cc
+++ b/src/rgw/rgw_gc.cc
@@ -217,7 +217,7 @@ int RGWGC::process(int index, int max_secs)
} while (truncated);
done:
- if (remove_tags.size())
+ if (!remove_tags.empty())
remove(index, remove_tags);
l.unlock(&store->gc_pool_ctx, obj_names[index]);
delete ctx;
diff --git a/src/rgw/rgw_log.cc b/src/rgw/rgw_log.cc
index e999f623a01..b79cf30bbe3 100644
--- a/src/rgw/rgw_log.cc
+++ b/src/rgw/rgw_log.cc
@@ -172,7 +172,7 @@ static void log_usage(struct req_state *s, const string& op_name)
string user;
if (s->bucket_name)
- user = s->bucket_owner;
+ user = s->bucket_owner.get_id();
else
user = s->user.user_id;
@@ -304,7 +304,8 @@ int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name, OpsL
entry.user = s->user.user_id;
if (s->object_acl)
entry.object_owner = s->object_acl->get_owner().get_id();
- entry.bucket_owner = s->bucket_owner;
+ entry.bucket_owner = s->bucket_owner.get_id();
+
uint64_t bytes_sent = s->cio->get_bytes_sent();
uint64_t bytes_received = s->cio->get_bytes_received();
@@ -337,7 +338,7 @@ int rgw_log_op(RGWRados *store, struct req_state *s, const string& op_name, OpsL
if (s->cct->_conf->rgw_ops_log_rados) {
string oid = render_log_object_name(s->cct->_conf->rgw_log_object_name, &bdt,
- s->bucket.bucket_id, entry.bucket.c_str());
+ s->bucket.bucket_id, entry.bucket);
rgw_obj obj(store->params.log_pool, oid);
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 3165ab0454a..3d4459cb576 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -162,7 +162,7 @@ class RGWProcess {
}
void _dump_queue() {
deque<RGWRequest *>::iterator iter;
- if (process->m_req_queue.size() == 0) {
+ if (process->m_req_queue.empty()) {
dout(20) << "RGWWQ: empty" << dendl;
return;
}
@@ -223,6 +223,13 @@ void RGWProcess::run()
if (chmod(path, 0777) < 0) {
dout(0) << "WARNING: couldn't set permissions on unix domain socket" << dendl;
}
+ } else if (!g_conf->rgw_port.empty()) {
+ string bind = g_conf->rgw_host + ":" + g_conf->rgw_port;
+ sock_fd = FCGX_OpenSocket(bind.c_str(), SOCKET_BACKLOG);
+ if (sock_fd < 0) {
+ dout(0) << "ERROR: FCGX_OpenSocket (" << bind.c_str() << ") returned " << sock_fd << dendl;
+ return;
+ }
}
m_tp.start();
@@ -389,6 +396,7 @@ int main(int argc, const char **argv)
vector<const char *> def_args;
def_args.push_back("--debug-rgw=20");
def_args.push_back("--keyring=$rgw_data/keyring");
+ def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name");
vector<const char*> args;
argv_to_vec(argc, argv, args);
@@ -397,8 +405,8 @@ int main(int argc, const char **argv)
CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
if (g_conf->daemonize) {
- if (g_conf->rgw_socket_path.empty()) {
- cerr << "radosgw: must specify 'rgw socket path' to run as a daemon" << std::endl;
+ if (g_conf->rgw_socket_path.empty() and g_conf->rgw_port.empty()) {
+ cerr << "radosgw: must specify 'rgw socket path' or 'rgw port' to run as a daemon" << std::endl;
exit(1);
}
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index eb22223a442..15349ebf0f9 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -287,6 +287,7 @@ int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bucket, b
{
int ret = 0;
string obj_str;
+ RGWUserInfo bucket_owner_info;
s->bucket_acl = new RGWAccessControlPolicy(s->cct);
@@ -298,11 +299,12 @@ int rgw_build_policies(RGWRados *store, struct req_state *s, bool only_bucket, b
return ret;
}
s->bucket = bucket_info.bucket;
- s->bucket_owner = bucket_info.owner;
string no_obj;
RGWAccessControlPolicy bucket_acl(s->cct);
ret = read_policy(store, s, bucket_info, s->bucket_acl, s->bucket, no_obj);
+
+ s->bucket_owner = s->bucket_acl->get_owner();
}
/* we're passed only_bucket = true when we specifically need the bucket's
@@ -384,13 +386,13 @@ int RGWGetObj::read_user_manifest_part(rgw_bucket& bucket, RGWObjEnt& ent, RGWAc
if (ret < 0)
goto done_err;
- len = bl.length();
+ off_t len = bl.length();
cur_ofs += len;
ofs += len;
ret = 0;
perfcounter->tinc(l_rgw_get_lat,
(ceph_clock_now(s->cct) - start_time));
- send_response_data(bl);
+ send_response_data(bl, 0, len);
start_time = ceph_clock_now(s->cct);
}
@@ -524,14 +526,43 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
return 0;
}
+class RGWGetObj_CB : public RGWGetDataCB
+{
+ RGWGetObj *op;
+public:
+ RGWGetObj_CB(RGWGetObj *_op) : op(_op) {}
+ virtual ~RGWGetObj_CB() {}
+
+ int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) {
+ return op->get_data_cb(bl, bl_ofs, bl_len);
+ }
+};
+
+int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len)
+{
+ /* garbage collection related handling */
+ utime_t start_time = ceph_clock_now(s->cct);
+ if (start_time > gc_invalidate_time) {
+ int r = store->defer_gc(s->obj_ctx, obj);
+ if (r < 0) {
+ dout(0) << "WARNING: could not defer gc entry for obj" << dendl;
+ }
+ gc_invalidate_time = start_time;
+ gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2);
+ }
+ return send_response_data(bl, bl_ofs, bl_len);
+}
+
void RGWGetObj::execute()
{
void *handle = NULL;
utime_t start_time = s->time;
bufferlist bl;
- utime_t gc_invalidate_time = ceph_clock_now(s->cct);
+ gc_invalidate_time = ceph_clock_now(s->cct);
gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2);
+ RGWGetObj_CB cb(this);
+
map<string, bufferlist>::iterator attr_iter;
perfcounter->inc(l_rgw_get);
@@ -539,11 +570,11 @@ void RGWGetObj::execute()
ret = get_params();
if (ret < 0)
- goto done;
+ goto done_err;
ret = init_common();
if (ret < 0)
- goto done;
+ goto done_err;
new_ofs = ofs;
new_end = end;
@@ -551,7 +582,7 @@ void RGWGetObj::execute()
ret = store->prepare_get_obj(s->obj_ctx, obj, &new_ofs, &new_end, &attrs, mod_ptr,
unmod_ptr, &lastmod, if_match, if_nomatch, &total_len, &s->obj_size, &handle, &s->err);
if (ret < 0)
- goto done;
+ goto done_err;
attr_iter = attrs.find(RGW_ATTR_USER_MANIFEST);
if (attr_iter != attrs.end()) {
@@ -568,53 +599,22 @@ void RGWGetObj::execute()
start = ofs;
if (!get_data || ofs > end)
- goto done;
+ goto done_err;
perfcounter->inc(l_rgw_get_b, end - ofs);
- while (ofs <= end) {
- ret = store->get_obj(s->obj_ctx, &handle, obj, bl, ofs, end);
- if (ret < 0) {
- goto done;
- }
- len = ret;
-
- if (!len) {
- dout(0) << "WARNING: failed to read object, returned zero length" << dendl;
- ret = -EIO;
- goto done;
- }
-
- ofs += len;
- ret = 0;
-
- perfcounter->tinc(l_rgw_get_lat,
- (ceph_clock_now(s->cct) - start_time));
- ret = send_response_data(bl);
- bl.clear();
- if (ret < 0) {
- dout(0) << "NOTICE: failed to send response to client" << dendl;
- goto done;
- }
-
- start_time = ceph_clock_now(s->cct);
+ ret = store->get_obj_iterate(s->obj_ctx, &handle, obj, ofs, end, &cb);
- if (ofs <= end) {
- if (start_time > gc_invalidate_time) {
- int r = store->defer_gc(s->obj_ctx, obj);
- if (r < 0) {
- dout(0) << "WARNING: could not defer gc entry for obj" << dendl;
- }
- gc_invalidate_time = start_time;
- gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2);
- }
- }
+ perfcounter->tinc(l_rgw_get_lat,
+ (ceph_clock_now(s->cct) - start_time));
+ if (ret < 0) {
+ goto done_err;
}
- return;
+ store->finish_get_obj(&handle);
-done:
- send_response_data(bl);
+done_err:
+ send_response_data(bl, 0, 0);
store->finish_get_obj(&handle);
}
@@ -773,7 +773,7 @@ void RGWListBucket::execute()
int RGWGetBucketLogging::verify_permission()
{
- if (s->user.user_id.compare(s->bucket_owner) != 0)
+ if (s->user.user_id.compare(s->bucket_owner.get_id()) != 0)
return -EACCES;
return 0;
@@ -811,7 +811,9 @@ void RGWCreateBucket::execute()
if (ret < 0)
return;
- s->bucket_owner = s->user.user_id;
+ s->bucket_owner.set_id(s->user.user_id);
+ s->bucket_owner.set_name(s->user.display_name);
+
r = get_policy_from_attr(s->cct, store, s->obj_ctx, &old_policy, obj);
if (r >= 0) {
if (old_policy.get_owner().get_id().compare(s->user.user_id) != 0) {
@@ -1025,7 +1027,7 @@ int RGWPutObjProcessor_Aio::wait_pending_front()
bool RGWPutObjProcessor_Aio::pending_has_completed()
{
- if (pending.size() == 0)
+ if (pending.empty())
return false;
struct put_obj_aio_info& info = pending.front();
@@ -2193,7 +2195,7 @@ void RGWListBucketMultiparts::execute()
marker_meta = marker.get_meta();
ret = store->list_objects(s->bucket, max_uploads, prefix, delimiter, marker_meta, objs, common_prefixes,
!!(s->prot_flags & RGW_REST_SWIFT), mp_ns, &is_truncated, &mp_filter);
- if (objs.size()) {
+ if (!objs.empty()) {
vector<RGWObjEnt>::iterator iter;
RGWMultipartUploadEntry entry;
for (iter = objs.begin(); iter != objs.end(); ++iter) {
@@ -2253,7 +2255,7 @@ void RGWDeleteMultiObj::execute()
quiet = true;
begin_response();
- if (multi_delete->objects.size() == 0) {
+ if (multi_delete->objects.empty()) {
goto done;
}
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index b3a78846cda..08c10970e90 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -62,7 +62,6 @@ protected:
const char *if_match;
const char *if_nomatch;
off_t ofs;
- uint64_t len;
uint64_t total_len;
off_t start;
off_t end;
@@ -76,6 +75,7 @@ protected:
bool get_data;
bool partial_content;
rgw_obj obj;
+ utime_t gc_invalidate_time;
int init_common();
public:
@@ -87,7 +87,6 @@ public:
if_nomatch = NULL;
start = 0;
ofs = 0;
- len = 0;
total_len = 0;
end = -1;
mod_time = 0;
@@ -112,8 +111,10 @@ public:
uint64_t *ptotal_len, bool read_data);
int handle_user_manifest(const char *prefix);
+ int get_data_cb(bufferlist& bl, off_t ofs, off_t len);
+
virtual int get_params() = 0;
- virtual int send_response_data(bufferlist& bl) = 0;
+ virtual int send_response_data(bufferlist& bl, off_t ofs, off_t len) = 0;
virtual const char *name() { return "get_obj"; }
};
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 80f0cd8c4e0..ad81259a95d 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -4,6 +4,7 @@
#include "common/errno.h"
#include "common/Formatter.h"
+#include "common/Throttle.h"
#include "rgw_rados.h"
#include "rgw_cache.h"
@@ -848,14 +849,14 @@ int RGWRados::select_bucket_placement(string& bucket_name, rgw_bucket& bucket)
}
read_omap:
- if (!m.size()) {
+ if (m.empty()) {
bufferlist header;
ret = omap_get_all(obj, header, m);
write_map = true;
}
- if (ret < 0 || !m.size()) {
+ if (ret < 0 || m.empty()) {
vector<string> names;
names.push_back(default_storage_pool);
vector<int> retcodes;
@@ -2341,8 +2342,7 @@ int RGWRados::prepare_get_obj(void *ctx, rgw_obj& obj,
done_err:
delete new_ctx;
- delete state;
- *handle = NULL;
+ finish_get_obj(handle);
return r;
}
@@ -2654,8 +2654,7 @@ done:
r = bl.length();
}
if (r < 0 || !len || ((off_t)(ofs + len - 1) == end)) {
- delete state;
- *handle = NULL;
+ finish_get_obj(handle);
}
done_ret:
@@ -2664,6 +2663,332 @@ done_ret:
return r;
}
+struct get_obj_data;
+
+struct get_obj_aio_data {
+ struct get_obj_data *op_data;
+ off_t ofs;
+ off_t len;
+};
+
+struct get_obj_io {
+ off_t len;
+ bufferlist bl;
+};
+
+static void _get_obj_aio_completion_cb(completion_t cb, void *arg);
+
+struct get_obj_data : public RefCountedObject {
+ CephContext *cct;
+ RGWRados *rados;
+ void *ctx;
+ IoCtx io_ctx;
+ map<off_t, get_obj_io> io_map;
+ map<off_t, librados::AioCompletion *> completion_map;
+ uint64_t total_read;
+ Mutex lock;
+ Mutex data_lock;
+ list<get_obj_aio_data> aio_data;
+ RGWGetDataCB *client_cb;
+ atomic_t cancelled;
+ atomic_t err_code;
+ Throttle throttle;
+
+ get_obj_data(CephContext *_cct) : cct(_cct),
+ total_read(0), lock("get_obj_data"), data_lock("get_obj_data::data_lock"),
+ throttle(cct, "get_obj_data", cct->_conf->rgw_get_obj_window_size, false) {}
+ virtual ~get_obj_data() { }
+ void set_cancelled(int r) {
+ cancelled.set(1);
+ err_code.set(r);
+ }
+
+ bool is_cancelled() {
+ return cancelled.read() == 1;
+ }
+
+ int get_err_code() {
+ return err_code.read();
+ }
+
+ int wait_next_io(bool *done) {
+ lock.Lock();
+ map<off_t, librados::AioCompletion *>::iterator iter = completion_map.begin();
+ if (iter == completion_map.end()) {
+ *done = true;
+ lock.Unlock();
+ return 0;
+ }
+ off_t cur_ofs = iter->first;
+ librados::AioCompletion *c = iter->second;
+ lock.Unlock();
+
+ c->wait_for_complete_and_cb();
+ int r = c->get_return_value();
+ c->release();
+
+ lock.Lock();
+ completion_map.erase(cur_ofs);
+
+ if (completion_map.empty()) {
+ *done = true;
+ }
+ lock.Unlock();
+
+ return r;
+ }
+
+ void add_io(off_t ofs, off_t len, bufferlist **pbl, AioCompletion **pc) {
+ Mutex::Locker l(lock);
+
+ get_obj_io& io = io_map[ofs];
+ *pbl = &io.bl;
+
+ struct get_obj_aio_data aio;
+ aio.ofs = ofs;
+ aio.len = len;
+ aio.op_data = this;
+
+ aio_data.push_back(aio);
+
+ struct get_obj_aio_data *paio_data = &aio_data.back(); /* last element */
+
+ librados::AioCompletion *c = librados::Rados::aio_create_completion((void *)paio_data, _get_obj_aio_completion_cb, NULL);
+ completion_map[ofs] = c;
+
+ *pc = c;
+
+ /* we have a reference per IO, plus one reference for the calling function.
+ * reference is dropped for each callback, plus when we're done iterating
+ * over the parts */
+ get();
+ }
+
+ void cancel_io(off_t ofs) {
+ ldout(cct, 20) << "get_obj_data::cancel_io() ofs=" << ofs << dendl;
+ lock.Lock();
+ map<off_t, AioCompletion *>::iterator iter = completion_map.find(ofs);
+ if (iter != completion_map.end()) {
+ AioCompletion *c = iter->second;
+ c->release();
+ completion_map.erase(ofs);
+ io_map.erase(ofs);
+ }
+ lock.Unlock();
+
+ /* we don't drop a reference here -- e.g., not calling d->put(), because we still
+ * need IoCtx to live, as io callback may still be called
+ */
+ }
+
+ void cancel_all_io() {
+ ldout(cct, 20) << "get_obj_data::cancel_all_io()" << dendl;
+ Mutex::Locker l(lock);
+ for (map<off_t, librados::AioCompletion *>::iterator iter = completion_map.begin();
+ iter != completion_map.end(); ++iter) {
+ librados::AioCompletion *c = iter->second;
+ c->release();
+ }
+ }
+
+ int get_complete_ios(off_t ofs, list<bufferlist>& bl_list) {
+ Mutex::Locker l(lock);
+
+ map<off_t, get_obj_io>::iterator liter = io_map.begin();
+
+ if (liter == io_map.end() ||
+ liter->first != ofs) {
+ return 0;
+ }
+
+ map<off_t, librados::AioCompletion *>::iterator aiter;
+ aiter = completion_map.find(ofs);
+ if (aiter == completion_map.end()) {
+ /* completion map does not hold this io, it was cancelled */
+ return 0;
+ }
+
+ AioCompletion *completion = aiter->second;
+ int r = completion->get_return_value();
+ if (r < 0)
+ return r;
+
+ for (; aiter != completion_map.end(); aiter++) {
+ completion = aiter->second;
+ if (!completion->is_complete()) {
+ /* reached a request that is not yet complete, stop */
+ break;
+ }
+
+ r = completion->get_return_value();
+ if (r < 0) {
+ set_cancelled(r); /* mark it as cancelled, so that we don't continue processing next operations */
+ return r;
+ }
+
+ total_read += r;
+
+ map<off_t, get_obj_io>::iterator old_liter = liter++;
+ bl_list.push_back(old_liter->second.bl);
+ io_map.erase(old_liter);
+ }
+
+ return 0;
+ }
+};
+
+static int _get_obj_iterate_cb(rgw_obj& obj, off_t obj_ofs, off_t read_ofs, off_t len, bool is_head_obj, RGWObjState *astate, void *arg)
+{
+ struct get_obj_data *d = (struct get_obj_data *)arg;
+
+ return d->rados->get_obj_iterate_cb(d->ctx, astate, obj, obj_ofs, read_ofs, len, is_head_obj, arg);
+}
+
+static void _get_obj_aio_completion_cb(completion_t cb, void *arg)
+{
+ struct get_obj_aio_data *aio_data = (struct get_obj_aio_data *)arg;
+ struct get_obj_data *d = aio_data->op_data;
+
+ d->rados->get_obj_aio_completion_cb(cb, arg);
+}
+
+
+void RGWRados::get_obj_aio_completion_cb(completion_t c, void *arg)
+{
+ struct get_obj_aio_data *aio_data = (struct get_obj_aio_data *)arg;
+ struct get_obj_data *d = aio_data->op_data;
+ off_t ofs = aio_data->ofs;
+ off_t len = aio_data->len;
+
+ list<bufferlist> bl_list;
+ list<bufferlist>::iterator iter;
+ int r;
+
+ ldout(cct, 20) << "get_obj_aio_completion_cb: io completion ofs=" << ofs << " len=" << len << dendl;
+ d->throttle.put(len);
+
+ if (d->is_cancelled())
+ goto done;
+
+ d->data_lock.Lock();
+
+ r = d->get_complete_ios(ofs, bl_list);
+ if (r < 0) {
+ goto done_unlock;
+ }
+
+ for (iter = bl_list.begin(); iter != bl_list.end(); ++iter) {
+ bufferlist& bl = *iter;
+ d->client_cb->handle_data(bl, 0, bl.length());
+ }
+
+done_unlock:
+ d->data_lock.Unlock();
+done:
+ d->put();
+ return;
+}
+
+int RGWRados::get_obj_iterate_cb(void *ctx, RGWObjState *astate,
+ rgw_obj& obj,
+ off_t obj_ofs,
+ off_t read_ofs, off_t len,
+ bool is_head_obj, void *arg)
+{
+ RGWRadosCtx *rctx = (RGWRadosCtx *)ctx;
+ ObjectReadOperation op;
+ struct get_obj_data *d = (struct get_obj_data *)arg;
+
+ if (is_head_obj) {
+ /* only when reading from the head object do we need to do the atomic test */
+ int r = append_atomic_test(rctx, obj, op, &astate);
+ if (r < 0)
+ return r;
+
+ if (astate &&
+ obj_ofs < astate->data.length()) {
+ unsigned chunk_len = min((uint64_t)astate->data.length() - obj_ofs, (uint64_t)len);
+
+ d->data_lock.Lock();
+ d->client_cb->handle_data(astate->data, obj_ofs, chunk_len);
+ d->data_lock.Unlock();
+
+ d->lock.Lock();
+ d->total_read += chunk_len;
+ d->lock.Unlock();
+
+ len -= chunk_len;
+ read_ofs += chunk_len;
+ obj_ofs += chunk_len;
+ if (!len)
+ return 0;
+ }
+ }
+
+ string oid, key;
+ rgw_bucket bucket;
+ get_obj_bucket_and_oid_key(obj, bucket, oid, key);
+
+ bufferlist *pbl;
+ AioCompletion *c;
+
+ d->add_io(obj_ofs, len, &pbl, &c);
+
+ d->throttle.get(len);
+ if (d->is_cancelled()) {
+ return d->get_err_code();
+ }
+
+ ldout(cct, 20) << "rados->get_obj_iterate_cb oid=" << oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl;
+ op.read(read_ofs, len, pbl, NULL);
+
+ librados::IoCtx io_ctx(d->io_ctx);
+ io_ctx.locator_set_key(key);
+
+ int r = io_ctx.aio_operate(oid, c, &op, NULL);
+ ldout(cct, 20) << "rados->aio_operate r=" << r << " bl.length=" << pbl->length() << dendl;
+
+ if (r < 0) {
+ d->set_cancelled(r);
+ d->cancel_io(obj_ofs);
+ }
+
+ return r;
+}
+
+int RGWRados::get_obj_iterate(void *ctx, void **handle, rgw_obj& obj,
+ off_t ofs, off_t end,
+ RGWGetDataCB *cb)
+{
+ struct get_obj_data *data = new get_obj_data(cct);
+ bool done = false;
+
+ GetObjState *state = *(GetObjState **)handle;
+
+ data->rados = this;
+ data->ctx = ctx;
+ data->io_ctx.dup(state->io_ctx);
+ data->client_cb = cb;
+
+ int r = iterate_obj(ctx, obj, ofs, end, cct->_conf->rgw_get_obj_max_req_size, _get_obj_iterate_cb, (void *)data);
+ if (r < 0) {
+ goto done;
+ }
+
+ while (!done) {
+ r = data->wait_next_io(&done);
+ if (r < 0) {
+ dout(10) << "get_obj_iterate() r=" << r << ", canceling all io" << dendl;
+ data->cancel_all_io();
+ break;
+ }
+ }
+
+done:
+ data->put();
+ return r;
+}
+
void RGWRados::finish_get_obj(void **handle)
{
if (*handle) {
@@ -2673,6 +2998,87 @@ void RGWRados::finish_get_obj(void **handle)
}
}
+int RGWRados::iterate_obj(void *ctx, rgw_obj& obj,
+ off_t ofs, off_t end,
+ uint64_t max_chunk_size,
+ int (*iterate_obj_cb)(rgw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
+ void *arg)
+{
+ rgw_bucket bucket;
+ rgw_obj read_obj = obj;
+ uint64_t read_ofs = ofs;
+ uint64_t len;
+ RGWRadosCtx *rctx = (RGWRadosCtx *)ctx;
+ RGWRadosCtx *new_ctx = NULL;
+ bool reading_from_head = true;
+ RGWObjState *astate = NULL;
+
+ if (!rctx) {
+ new_ctx = new RGWRadosCtx(this);
+ rctx = new_ctx;
+ }
+
+ int r = get_obj_state(rctx, obj, &astate);
+ if (r < 0)
+ goto done_err;
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ if (astate->has_manifest) {
+ /* now get the relevant object part */
+ map<uint64_t, RGWObjManifestPart>::iterator iter = astate->manifest.objs.upper_bound(ofs);
+ /* we're now pointing at the next part (unless the first part starts at a higher ofs),
+ so retract to previous part */
+ if (iter != astate->manifest.objs.begin()) {
+ --iter;
+ }
+
+ for (; iter != astate->manifest.objs.end() && ofs <= end; ++iter) {
+ RGWObjManifestPart& part = iter->second;
+ off_t part_ofs = iter->first;
+ off_t next_part_ofs = part_ofs + part.size;
+
+ while (ofs < next_part_ofs && ofs <= end) {
+ read_obj = part.loc;
+ uint64_t read_len = min(len, part.size - (ofs - part_ofs));
+ read_ofs = part.loc_ofs + (ofs - part_ofs);
+
+ if (read_len > max_chunk_size) {
+ read_len = max_chunk_size;
+ }
+
+ reading_from_head = (read_obj == obj);
+ r = iterate_obj_cb(read_obj, ofs, read_ofs, read_len, reading_from_head, astate, arg);
+ if (r < 0)
+ goto done_err;
+
+ len -= read_len;
+ ofs += read_len;
+ }
+ }
+ } else {
+ while (ofs <= end) {
+ uint64_t read_len = min(len, max_chunk_size);
+
+ r = iterate_obj_cb(obj, ofs, ofs, read_len, reading_from_head, astate, arg);
+ if (r < 0)
+ goto done_err;
+
+ len -= read_len;
+ ofs += read_len;
+ }
+ }
+
+ return 0;
+
+done_err:
+ delete new_ctx;
+ return r;
+}
+
/* a simple object read */
int RGWRados::read(void *ctx, rgw_obj& obj, off_t ofs, size_t size, bufferlist& bl)
{
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index f86ef8cd833..3ae13c8524c 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -3,6 +3,7 @@
#include "include/rados/librados.hpp"
#include "include/Context.h"
+#include "common/RefCountedObj.h"
#include "rgw_common.h"
#include "cls/rgw/cls_rgw_types.h"
#include "rgw_log.h"
@@ -55,6 +56,12 @@ struct RGWUsageIter {
RGWUsageIter() : index(0) {}
};
+class RGWGetDataCB {
+public:
+ virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
+ virtual ~RGWGetDataCB() {}
+};
+
class RGWAccessListFilter {
public:
virtual ~RGWAccessListFilter() {}
@@ -625,7 +632,24 @@ public:
virtual void finish_get_obj(void **handle);
- /**
+ int iterate_obj(void *ctx, rgw_obj& obj,
+ off_t ofs, off_t end,
+ uint64_t max_chunk_size,
+ int (*iterate_obj_cb)(rgw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
+ void *arg);
+
+ int get_obj_iterate(void *ctx, void **handle, rgw_obj& obj,
+ off_t ofs, off_t end,
+ RGWGetDataCB *cb);
+
+ int get_obj_iterate_cb(void *ctx, RGWObjState *astate,
+ rgw_obj& obj,
+ off_t obj_ofs, off_t read_ofs, off_t len,
+ bool is_head_obj, void *arg);
+
+ void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
+
+ /**
* a simple object read without keeping state
*/
virtual int read(void *ctx, rgw_obj& obj, off_t ofs, size_t size, bufferlist& bl);
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 72aab14c522..ab3927e7a62 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -684,8 +684,10 @@ static int read_all_chunked_input(req_state *s, char **pdata, int *plen)
int read_len = 0, len = 0;
do {
int r = s->cio->read(data + len, need_to_read, &read_len);
- if (r < 0)
+ if (r < 0) {
+ free(data);
return r;
+ }
len += read_len;
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index bdba0e9c8f4..f5a7281f5ba 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -9,6 +9,7 @@
#include "rgw_rest_s3.h"
#include "rgw_acl.h"
#include "rgw_policy_s3.h"
+#include "rgw_user.h"
#include "common/armor.h"
@@ -66,7 +67,7 @@ static struct response_attr_param resp_attr_params[] = {
{NULL, NULL},
};
-int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl)
+int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
{
const char *content_type = NULL;
string content_type_str;
@@ -148,7 +149,7 @@ done:
send_data:
if (get_data && !orig_ret) {
- int r = s->cio->write(bl.c_str(), len);
+ int r = s->cio->write(bl.c_str() + bl_ofs, bl_len);
if (r < 0)
return r;
}
@@ -279,7 +280,8 @@ void RGWStatBucket_ObjStore_S3::send_response()
int RGWCreateBucket_ObjStore_S3::get_params()
{
RGWAccessControlPolicy_S3 s3policy(s->cct);
- int r = s3policy.create_canned(s->user.user_id, s->user.display_name, s->canned_acl);
+
+ int r = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
if (r < 0)
return r;
@@ -315,7 +317,7 @@ int RGWPutObj_ObjStore_S3::get_params()
if (!s->length)
return -ERR_LENGTH_REQUIRED;
- int r = s3policy.create_canned(s->user.user_id, s->user.display_name, s->canned_acl);
+ int r = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
if (!r)
return -EINVAL;
@@ -898,6 +900,8 @@ int RGWPostObj_ObjStore_S3::get_policy()
}
s->user = user_info;
+ s->owner.set_id(user_info.user_id);
+ s->owner.set_name(user_info.display_name);
} else {
ldout(s->cct, 0) << "No attached policy found!" << dendl;
}
@@ -907,7 +911,7 @@ int RGWPostObj_ObjStore_S3::get_policy()
RGWAccessControlPolicy_S3 s3policy(s->cct);
ldout(s->cct, 20) << "canned_acl=" << canned_acl << dendl;
- if (!s3policy.create_canned(s->user.user_id, "", canned_acl)) {
+ if (!s3policy.create_canned(s->owner, s->bucket_owner, canned_acl)) {
err_msg = "Bad canned ACLs";
return -EINVAL;
}
@@ -1114,7 +1118,7 @@ int RGWCopyObj_ObjStore_S3::init_dest_policy()
RGWAccessControlPolicy_S3 s3policy(s->cct);
/* build a policy for the target object */
- ret = s3policy.create_canned(s->user.user_id, s->user.display_name, s->canned_acl);
+ ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
if (!ret)
return -EINVAL;
@@ -1197,7 +1201,16 @@ void RGWGetACLs_ObjStore_S3::send_response()
int RGWPutACLs_ObjStore_S3::get_canned_policy(ACLOwner& owner, stringstream& ss)
{
RGWAccessControlPolicy_S3 s3policy(s->cct);
- bool r = s3policy.create_canned(owner.get_id(), owner.get_display_name(), s->canned_acl);
+
+ // bucket-* canned acls do not apply to bucket
+ if (s->object_str.empty()) {
+ if (s->canned_acl.find("bucket") != string::npos)
+ s->canned_acl.clear();
+ }
+
+ bool r;
+ r = s3policy.create_canned(owner, s->bucket_owner, s->canned_acl);
+
if (!r)
return -EINVAL;
@@ -1218,7 +1231,7 @@ void RGWPutACLs_ObjStore_S3::send_response()
int RGWInitMultipart_ObjStore_S3::get_params()
{
RGWAccessControlPolicy_S3 s3policy(s->cct);
- ret = s3policy.create_canned(s->user.user_id, s->user.display_name, s->canned_acl);
+ ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
if (!ret)
return -EINVAL;
@@ -1868,6 +1881,10 @@ int RGW_Auth_S3::authorize(RGWRados *store, struct req_state *s)
return -EPERM;
}
+ // populate the owner info
+ s->owner.set_id(s->user.user_id);
+ s->owner.set_name(s->user.display_name);
+
/* now verify signature */
string auth_hdr;
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index daa8037f065..dc38077fc3e 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -17,7 +17,7 @@ public:
RGWGetObj_ObjStore_S3() {}
~RGWGetObj_ObjStore_S3() {}
- int send_response_data(bufferlist& bl);
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len);
};
class RGWListBuckets_ObjStore_S3 : public RGWListBuckets_ObjStore {
@@ -161,6 +161,7 @@ public:
~RGWPutACLs_ObjStore_S3() {}
int get_canned_policy(ACLOwner& owner, stringstream& ss);
+
void send_response();
};
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index 34a73633f9a..28749aafe42 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -440,7 +440,7 @@ void RGWCopyObj_ObjStore_SWIFT::send_response()
end_header(s);
}
-int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl)
+int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
{
const char *content_type = NULL;
int orig_ret = ret;
@@ -503,7 +503,7 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl)
send_data:
if (get_data && !orig_ret) {
- int r = s->cio->write(bl.c_str(), len);
+ int r = s->cio->write(bl.c_str() + bl_ofs, bl_len);
if (r < 0)
return r;
}
@@ -767,7 +767,7 @@ int RGWHandler_ObjStore_SWIFT::init(RGWRados *store, struct req_state *s, RGWCli
int ret = validate_bucket_name(s->bucket_name_str.c_str());
if (ret)
return ret;
- ret = validate_object_name(s->object_str.c_str());
+ ret = validate_object_name(s->object_str);
if (ret)
return ret;
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
index 1735d151f44..1704823581b 100644
--- a/src/rgw/rgw_rest_swift.h
+++ b/src/rgw/rgw_rest_swift.h
@@ -10,7 +10,7 @@ public:
RGWGetObj_ObjStore_SWIFT() {}
~RGWGetObj_ObjStore_SWIFT() {}
- int send_response_data(bufferlist& bl);
+ int send_response_data(bufferlist& bl, off_t ofs, off_t len);
};
class RGWListBuckets_ObjStore_SWIFT : public RGWListBuckets_ObjStore {
diff --git a/src/rgw/rgw_usage.cc b/src/rgw/rgw_usage.cc
index 054abc2c334..f5016913b68 100644
--- a/src/rgw/rgw_usage.cc
+++ b/src/rgw/rgw_usage.cc
@@ -14,7 +14,7 @@ static void dump_usage_categories_info(Formatter *formatter, const rgw_usage_log
formatter->open_array_section("categories");
map<string, rgw_usage_data>::const_iterator uiter;
for (uiter = entry.usage_map.begin(); uiter != entry.usage_map.end(); ++uiter) {
- if (categories && categories->size() && !categories->count(uiter->first))
+ if (categories && !categories->empty() && !categories->count(uiter->first))
continue;
const rgw_usage_data& usage = uiter->second;
formatter->open_object_section("entry");
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index f05f594d321..e4dbb56319e 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -56,7 +56,7 @@ int rgw_store_user_info(RGWRados *store, RGWUserInfo& info, RGWUserInfo *old_inf
}
}
- if (info.access_keys.size()) {
+ if (!info.access_keys.empty()) {
/* check if access keys already exist */
RGWUserInfo inf;
map<string, RGWAccessKey>::iterator iter = info.access_keys.begin();
@@ -95,7 +95,7 @@ int rgw_store_user_info(RGWRados *store, RGWUserInfo& info, RGWUserInfo *old_inf
}
}
- if (info.access_keys.size()) {
+ if (!info.access_keys.empty()) {
map<string, RGWAccessKey>::iterator iter = info.access_keys.begin();
for (; iter != info.access_keys.end(); ++iter) {
RGWAccessKey& k = iter->second;
diff --git a/src/rgw/rgw_xml.cc b/src/rgw/rgw_xml.cc
index 4347b06115c..eee69d026ba 100644
--- a/src/rgw/rgw_xml.cc
+++ b/src/rgw/rgw_xml.cc
@@ -209,9 +209,16 @@ bool RGWXMLParser::init()
bool RGWXMLParser::parse(const char *_buf, int len, int done)
{
int pos = buf_len;
- buf = (char *)realloc(buf, buf_len + len);
- if (!buf)
+ char *tmp_buf;
+ tmp_buf = (char *)realloc(buf, buf_len + len);
+ if (tmp_buf == NULL){
+ free(buf);
+ buf = NULL;
return false;
+ } else {
+ buf = tmp_buf;
+ }
+
memcpy(&buf[buf_len], _buf, len);
buf_len += len;
diff --git a/src/scratchtoolpp.cc b/src/scratchtoolpp.cc
index 01db29e9f2b..62096920300 100644
--- a/src/scratchtoolpp.cc
+++ b/src/scratchtoolpp.cc
@@ -109,7 +109,7 @@ int main(int argc, const char **argv)
uint64_t stat_size;
time_t stat_mtime;
r = io_ctx.stat(oid, &stat_size, &stat_mtime);
- cout << "io_ctx.stat size = " << stat_size << " mtime = " << stat_mtime << std::endl;
+ cout << "io_ctx.stat returned " << r << " size = " << stat_size << " mtime = " << stat_mtime << std::endl;
r = io_ctx.stat(oid, NULL, NULL);
cout << "io_ctx.stat(does_not_exist) = " << r;
@@ -205,8 +205,9 @@ int main(int argc, const char **argv)
cout << "sha1 result=" << sha1_str << std::endl;
r = io_ctx.exec(oid, "acl", "set", bl, bl2);
+ cout << "exec (set) returned " << r << std::endl;
r = io_ctx.exec(oid, "acl", "get", bl, bl2);
- cout << "exec returned " << r << std::endl;
+ cout << "exec (get) returned " << r << std::endl;
if (bl2.length() > 0) {
cout << "attr=" << bl2.c_str() << std::endl;
}
diff --git a/src/test/ObjectMap/test_keyvaluedb_iterators.cc b/src/test/ObjectMap/test_keyvaluedb_iterators.cc
index e5c9089916c..aa63e1a2de4 100644
--- a/src/test/ObjectMap/test_keyvaluedb_iterators.cc
+++ b/src/test/ObjectMap/test_keyvaluedb_iterators.cc
@@ -147,7 +147,7 @@ public:
void validate_prefix(KeyValueDB::WholeSpaceIterator iter,
string &prefix, deque<string> &keys) {
- while (keys.size() > 0) {
+ while (!keys.empty()) {
ASSERT_TRUE(iter->valid());
string expected_key = keys.front();
keys.pop_front();
@@ -170,7 +170,7 @@ public:
void validate_prefix_backwards(KeyValueDB::WholeSpaceIterator iter,
string &prefix, deque<string> &keys) {
- while (keys.size() > 0) {
+ while (!keys.empty()) {
ASSERT_TRUE(iter->valid());
string expected_key = keys.front();
keys.pop_front();
diff --git a/src/test/ObjectMap/test_object_map.cc b/src/test/ObjectMap/test_object_map.cc
index e536be3b847..e26b0e2c31c 100644
--- a/src/test/ObjectMap/test_object_map.cc
+++ b/src/test/ObjectMap/test_object_map.cc
@@ -125,7 +125,7 @@ public:
to_get.insert(key);
map<string, bufferlist> got;
db->get_xattrs(hoid, to_get, &got);
- if (got.size()) {
+ if (!got.empty()) {
*value = string(got.begin()->second.c_str(),
got.begin()->second.length());
return 1;
@@ -145,7 +145,7 @@ public:
to_get.insert(key);
map<string, bufferlist> got;
db->get_values(hoid, to_get, &got);
- if (got.size()) {
+ if (!got.empty()) {
*value = string(got.begin()->second.c_str(),
got.begin()->second.length());
return 1;
diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc
index 4dafee8cd34..50508d0085e 100644
--- a/src/test/bufferlist.cc
+++ b/src/test/bufferlist.cc
@@ -1,14 +1,1766 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
#include <tr1/memory>
+#include <limits.h>
+#include <errno.h>
+#include <sys/uio.h>
#include "include/buffer.h"
#include "include/encoding.h"
+#include "common/environment.h"
#include "gtest/gtest.h"
#include "stdlib.h"
-
+#include "fcntl.h"
+#include "sys/stat.h"
#define MAX_TEST 1000000
+TEST(Buffer, constructors) {
+ bool ceph_buffer_track = get_env_bool("CEPH_BUFFER_TRACK");
+ unsigned len = 17;
+ //
+ // buffer::create
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ bufferptr ptr(buffer::create(len));
+ EXPECT_EQ(len, ptr.length());
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ }
+ //
+ // buffer::claim_char
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ char* str = new char[len];
+ ::memset(str, 'X', len);
+ bufferptr ptr(buffer::claim_char(len, str));
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(str, ptr.c_str());
+ bufferptr clone = ptr.clone();
+ EXPECT_EQ(0, ::memcmp(clone.c_str(), ptr.c_str(), len));
+ }
+ //
+ // buffer::create_static
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ char* str = new char[len];
+ bufferptr ptr(buffer::create_static(len, str));
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(str, ptr.c_str());
+ delete [] str;
+ }
+ //
+ // buffer::create_malloc
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ bufferptr ptr(buffer::create_malloc(len));
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ EXPECT_EQ(len, ptr.length());
+ // this doesn't throw on my x86_64 wheezy box --sage
+ //EXPECT_THROW(buffer::create_malloc((unsigned)ULLONG_MAX), buffer::bad_alloc);
+ }
+ //
+ // buffer::claim_malloc
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ char* str = (char*)malloc(len);
+ ::memset(str, 'X', len);
+ bufferptr ptr(buffer::claim_malloc(len, str));
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(str, ptr.c_str());
+ bufferptr clone = ptr.clone();
+ EXPECT_EQ(0, ::memcmp(clone.c_str(), ptr.c_str(), len));
+ }
+ //
+ // buffer::copy
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ const std::string expected(len, 'X');
+ bufferptr ptr(buffer::copy(expected.c_str(), expected.size()));
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ EXPECT_NE(expected.c_str(), ptr.c_str());
+ EXPECT_EQ(0, ::memcmp(expected.c_str(), ptr.c_str(), len));
+ }
+ //
+ // buffer::create_page_aligned
+ //
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+ {
+ bufferptr ptr(buffer::create_page_aligned(len));
+ ::memset(ptr.c_str(), 'X', len);
+ if (ceph_buffer_track)
+ EXPECT_EQ(len, (unsigned)buffer::get_total_alloc());
+ // doesn't throw on my x86_64 wheezy box --sage
+ //EXPECT_THROW(buffer::create_page_aligned((unsigned)ULLONG_MAX), buffer::bad_alloc);
+#ifndef DARWIN
+ ASSERT_TRUE(ptr.is_page_aligned());
+#endif // DARWIN
+ bufferptr clone = ptr.clone();
+ EXPECT_EQ(0, ::memcmp(clone.c_str(), ptr.c_str(), len));
+ }
+ if (ceph_buffer_track)
+ EXPECT_EQ(0, buffer::get_total_alloc());
+}
+
+TEST(BufferRaw, ostream) {
+ bufferptr ptr(1);
+ std::ostringstream stream;
+ stream << *ptr.get_raw();
+ EXPECT_GT(stream.str().size(), stream.str().find("buffer::raw("));
+ EXPECT_GT(stream.str().size(), stream.str().find("len 1 nref 1)"));
+}
+
+//
+// +-----------+ +-----+
+// | | | |
+// | offset +----------------+ |
+// | | | |
+// | length +---- | |
+// | | \------- | |
+// +-----------+ \---+ |
+// | ptr | +-----+
+// +-----------+ | raw |
+// +-----+
+//
+TEST(BufferPtr, constructors) {
+ unsigned len = 17;
+ //
+ // ptr::ptr()
+ //
+ {
+ buffer::ptr ptr;
+ EXPECT_FALSE(ptr.have_raw());
+ EXPECT_EQ((unsigned)0, ptr.offset());
+ EXPECT_EQ((unsigned)0, ptr.length());
+ }
+ //
+ // ptr::ptr(raw *r)
+ //
+ {
+ bufferptr ptr(buffer::create(len));
+ EXPECT_TRUE(ptr.have_raw());
+ EXPECT_EQ((unsigned)0, ptr.offset());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(ptr.raw_length(), ptr.length());
+ EXPECT_EQ(1, ptr.raw_nref());
+ }
+ //
+ // ptr::ptr(unsigned l)
+ //
+ {
+ bufferptr ptr(len);
+ EXPECT_TRUE(ptr.have_raw());
+ EXPECT_EQ((unsigned)0, ptr.offset());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(1, ptr.raw_nref());
+ }
+ //
+ // ptr(const char *d, unsigned l)
+ //
+ {
+ const std::string str(len, 'X');
+ bufferptr ptr(str.c_str(), len);
+ EXPECT_TRUE(ptr.have_raw());
+ EXPECT_EQ((unsigned)0, ptr.offset());
+ EXPECT_EQ(len, ptr.length());
+ EXPECT_EQ(1, ptr.raw_nref());
+ EXPECT_EQ(0, ::memcmp(str.c_str(), ptr.c_str(), len));
+ }
+ //
+ // ptr(const ptr& p)
+ //
+ {
+ const std::string str(len, 'X');
+ bufferptr original(str.c_str(), len);
+ bufferptr ptr(original);
+ EXPECT_TRUE(ptr.have_raw());
+ EXPECT_EQ(original.get_raw(), ptr.get_raw());
+ EXPECT_EQ(2, ptr.raw_nref());
+ EXPECT_EQ(0, ::memcmp(original.c_str(), ptr.c_str(), len));
+ }
+ //
+ // ptr(const ptr& p, unsigned o, unsigned l)
+ //
+ {
+ const std::string str(len, 'X');
+ bufferptr original(str.c_str(), len);
+ bufferptr ptr(original, 0, 0);
+ EXPECT_TRUE(ptr.have_raw());
+ EXPECT_EQ(original.get_raw(), ptr.get_raw());
+ EXPECT_EQ(2, ptr.raw_nref());
+ EXPECT_EQ(0, ::memcmp(original.c_str(), ptr.c_str(), len));
+ EXPECT_THROW(bufferptr(original, 0, original.length() + 1), FailedAssertion);
+ EXPECT_THROW(bufferptr(bufferptr(), 0, 0), FailedAssertion);
+ }
+}
+
+TEST(BufferPtr, assignment) {
+ unsigned len = 17;
+ //
+ // override a bufferptr set with the same raw
+ //
+ {
+ bufferptr original(len);
+ bufferptr same_raw(original.get_raw());
+ unsigned offset = 5;
+ unsigned length = len - offset;
+ original.set_offset(offset);
+ original.set_length(length);
+ same_raw = original;
+ ASSERT_EQ(2, original.raw_nref());
+ ASSERT_EQ(same_raw.get_raw(), original.get_raw());
+ ASSERT_EQ(same_raw.offset(), original.offset());
+ ASSERT_EQ(same_raw.length(), original.length());
+ }
+
+ //
+ // self assignment is a noop
+ //
+ {
+ bufferptr original(len);
+ original = original;
+ ASSERT_EQ(1, original.raw_nref());
+ ASSERT_EQ((unsigned)0, original.offset());
+ ASSERT_EQ(len, original.length());
+ }
+
+ //
+ // a copy points to the same raw
+ //
+ {
+ bufferptr original(len);
+ unsigned offset = 5;
+ unsigned length = len - offset;
+ original.set_offset(offset);
+ original.set_length(length);
+ bufferptr ptr;
+ ptr = original;
+ ASSERT_EQ(2, original.raw_nref());
+ ASSERT_EQ(ptr.get_raw(), original.get_raw());
+ ASSERT_EQ(original.offset(), ptr.offset());
+ ASSERT_EQ(original.length(), ptr.length());
+ }
+}
+
+TEST(BufferPtr, clone) {
+ unsigned len = 17;
+ bufferptr ptr(len);
+ ::memset(ptr.c_str(), 'X', len);
+ bufferptr clone = ptr.clone();
+ EXPECT_EQ(0, ::memcmp(clone.c_str(), ptr.c_str(), len));
+}
+
+TEST(BufferPtr, swap) {
+ unsigned len = 17;
+
+ bufferptr ptr1(len);
+ ::memset(ptr1.c_str(), 'X', len);
+ unsigned ptr1_offset = 4;
+ ptr1.set_offset(ptr1_offset);
+ unsigned ptr1_length = 3;
+ ptr1.set_length(ptr1_length);
+
+ bufferptr ptr2(len);
+ ::memset(ptr2.c_str(), 'Y', len);
+ unsigned ptr2_offset = 5;
+ ptr2.set_offset(ptr2_offset);
+ unsigned ptr2_length = 7;
+ ptr2.set_length(ptr2_length);
+
+ ptr1.swap(ptr2);
+
+ EXPECT_EQ(ptr2_length, ptr1.length());
+ EXPECT_EQ(ptr2_offset, ptr1.offset());
+ EXPECT_EQ('Y', ptr1[0]);
+
+ EXPECT_EQ(ptr1_length, ptr2.length());
+ EXPECT_EQ(ptr1_offset, ptr2.offset());
+ EXPECT_EQ('X', ptr2[0]);
+}
+
+TEST(BufferPtr, release) {
+ unsigned len = 17;
+
+ bufferptr ptr1(len);
+ {
+ bufferptr ptr2(ptr1);
+ EXPECT_EQ(2, ptr1.raw_nref());
+ }
+ EXPECT_EQ(1, ptr1.raw_nref());
+}
+
+TEST(BufferPtr, have_raw) {
+ {
+ bufferptr ptr;
+ EXPECT_FALSE(ptr.have_raw());
+ }
+ {
+ bufferptr ptr(1);
+ EXPECT_TRUE(ptr.have_raw());
+ }
+}
+
+TEST(BufferPtr, at_buffer_head) {
+ bufferptr ptr(2);
+ EXPECT_TRUE(ptr.at_buffer_head());
+ ptr.set_offset(1);
+ EXPECT_FALSE(ptr.at_buffer_head());
+}
+
+TEST(BufferPtr, at_buffer_tail) {
+ bufferptr ptr(2);
+ EXPECT_TRUE(ptr.at_buffer_tail());
+ ptr.set_length(1);
+ EXPECT_FALSE(ptr.at_buffer_tail());
+}
+
+TEST(BufferPtr, is_n_page_sized) {
+ {
+ bufferptr ptr(CEPH_PAGE_SIZE);
+ EXPECT_TRUE(ptr.is_n_page_sized());
+ }
+ {
+ bufferptr ptr(1);
+ EXPECT_FALSE(ptr.is_n_page_sized());
+ }
+}
+
+TEST(BufferPtr, accessors) {
+ unsigned len = 17;
+ bufferptr ptr(len);
+ ptr.c_str()[0] = 'X';
+ ptr[1] = 'Y';
+ const bufferptr const_ptr(ptr);
+
+ EXPECT_NE((void*)NULL, (void*)ptr.get_raw());
+ EXPECT_EQ('X', ptr.c_str()[0]);
+ {
+ bufferptr ptr;
+ EXPECT_THROW(ptr.c_str(), FailedAssertion);
+ EXPECT_THROW(ptr[0], FailedAssertion);
+ }
+ EXPECT_EQ('X', const_ptr.c_str()[0]);
+ {
+ const bufferptr const_ptr;
+ EXPECT_THROW(const_ptr.c_str(), FailedAssertion);
+ EXPECT_THROW(const_ptr[0], FailedAssertion);
+ }
+ EXPECT_EQ(len, const_ptr.length());
+ EXPECT_EQ((unsigned)0, const_ptr.offset());
+ EXPECT_EQ((unsigned)0, const_ptr.start());
+ EXPECT_EQ(len, const_ptr.end());
+ EXPECT_EQ(len, const_ptr.end());
+ {
+ bufferptr ptr(len);
+ unsigned unused = 1;
+ ptr.set_length(ptr.length() - unused);
+ EXPECT_EQ(unused, ptr.unused_tail_length());
+ }
+ {
+ bufferptr ptr;
+ EXPECT_EQ((unsigned)0, ptr.unused_tail_length());
+ }
+ EXPECT_THROW(ptr[len], FailedAssertion);
+ EXPECT_THROW(const_ptr[len], FailedAssertion);
+ {
+ const bufferptr const_ptr;
+ EXPECT_THROW(const_ptr.raw_c_str(), FailedAssertion);
+ EXPECT_THROW(const_ptr.raw_length(), FailedAssertion);
+ EXPECT_THROW(const_ptr.raw_nref(), FailedAssertion);
+ }
+ EXPECT_NE((const char *)NULL, const_ptr.raw_c_str());
+ EXPECT_EQ(len, const_ptr.raw_length());
+ EXPECT_EQ(2, const_ptr.raw_nref());
+ {
+ bufferptr ptr(len);
+ unsigned wasted = 1;
+ ptr.set_length(ptr.length() - wasted * 2);
+ ptr.set_offset(wasted);
+ EXPECT_EQ(wasted * 2, ptr.wasted());
+ }
+}
+
+TEST(BufferPtr, cmp) {
+ bufferptr empty;
+ bufferptr a("A", 1);
+ bufferptr ab("AB", 2);
+ bufferptr af("AF", 2);
+ bufferptr acc("ACC", 3);
+ EXPECT_GE(-1, empty.cmp(a));
+ EXPECT_LE(1, a.cmp(empty));
+ EXPECT_GE(-1, a.cmp(ab));
+ EXPECT_LE(1, ab.cmp(a));
+ EXPECT_EQ(0, ab.cmp(ab));
+ EXPECT_GE(-1, ab.cmp(af));
+ EXPECT_LE(1, af.cmp(ab));
+ EXPECT_GE(-1, acc.cmp(af));
+ EXPECT_LE(1, af.cmp(acc));
+}
+
+TEST(BufferPtr, is_zero) {
+ char str[2] = { '\0', 'X' };
+ {
+ const bufferptr ptr(buffer::create_static(2, str));
+ EXPECT_FALSE(ptr.is_zero());
+ }
+ {
+ const bufferptr ptr(buffer::create_static(1, str));
+ EXPECT_TRUE(ptr.is_zero());
+ }
+}
+
+TEST(BufferPtr, copy_out) {
+ {
+ const bufferptr ptr;
+ EXPECT_THROW(ptr.copy_out((unsigned)0, (unsigned)0, NULL), FailedAssertion);
+ }
+ {
+ char in[] = "ABC";
+ const bufferptr ptr(buffer::create_static(strlen(in), in));
+ EXPECT_THROW(ptr.copy_out((unsigned)0, strlen(in) + 1, NULL), buffer::end_of_buffer);
+ EXPECT_THROW(ptr.copy_out(strlen(in) + 1, (unsigned)0, NULL), buffer::end_of_buffer);
+ char out[1] = { 'X' };
+ ptr.copy_out((unsigned)1, (unsigned)1, out);
+ EXPECT_EQ('B', out[0]);
+ }
+}
+
+TEST(BufferPtr, copy_in) {
+ {
+ bufferptr ptr;
+ EXPECT_THROW(ptr.copy_in((unsigned)0, (unsigned)0, NULL), FailedAssertion);
+ }
+ {
+ char in[] = "ABCD";
+ bufferptr ptr(2);
+ EXPECT_THROW(ptr.copy_in((unsigned)0, strlen(in) + 1, NULL), FailedAssertion);
+ EXPECT_THROW(ptr.copy_in(strlen(in) + 1, (unsigned)0, NULL), FailedAssertion);
+ ptr.copy_in((unsigned)0, (unsigned)2, in);
+ EXPECT_EQ(in[0], ptr[0]);
+ EXPECT_EQ(in[1], ptr[1]);
+ }
+}
+
+TEST(BufferPtr, append) {
+ {
+ bufferptr ptr;
+ EXPECT_THROW(ptr.append('A'), FailedAssertion);
+ EXPECT_THROW(ptr.append("B", (unsigned)1), FailedAssertion);
+ }
+ {
+ bufferptr ptr(2);
+ EXPECT_THROW(ptr.append('A'), FailedAssertion);
+ EXPECT_THROW(ptr.append("B", (unsigned)1), FailedAssertion);
+ ptr.set_length(0);
+ ptr.append('A');
+ EXPECT_EQ((unsigned)1, ptr.length());
+ EXPECT_EQ('A', ptr[0]);
+ ptr.append("B", (unsigned)1);
+ EXPECT_EQ((unsigned)2, ptr.length());
+ EXPECT_EQ('B', ptr[1]);
+ }
+}
+
+TEST(BufferPtr, zero) {
+ char str[] = "XXXX";
+ bufferptr ptr(buffer::create_static(strlen(str), str));
+ EXPECT_THROW(ptr.zero(ptr.length() + 1, 0), FailedAssertion);
+ ptr.zero(1, 1);
+ EXPECT_EQ('X', ptr[0]);
+ EXPECT_EQ('\0', ptr[1]);
+ EXPECT_EQ('X', ptr[2]);
+ ptr.zero();
+ EXPECT_EQ('\0', ptr[0]);
+}
+
+TEST(BufferPtr, ostream) {
+ {
+ bufferptr ptr;
+ std::ostringstream stream;
+ stream << ptr;
+ EXPECT_GT(stream.str().size(), stream.str().find("buffer:ptr(0~0 no raw"));
+ }
+ {
+ char str[] = "XXXX";
+ bufferptr ptr(buffer::create_static(strlen(str), str));
+ std::ostringstream stream;
+ stream << ptr;
+ EXPECT_GT(stream.str().size(), stream.str().find("len 4 nref 1)"));
+ }
+}
+
+//
+// +---------+
+// | +-----+ |
+// list ptr | | | |
+// +----------+ +-----+ | | | |
+// | append_ >-------> >--------------------> | |
+// | buffer | +-----+ | | | |
+// +----------+ ptr | | | |
+// | _len | list +-----+ | | | |
+// +----------+ +------+ ,--->+ >-----> | |
+// | _buffers >----> >----- +-----+ | +-----+ |
+// +----------+ +----^-+ \ ptr | raw |
+// | last_p | / `-->+-----+ | +-----+ |
+// +--------+-+ / + >-----> | |
+// | ,- ,--->+-----+ | | | |
+// | / ,--- | | | |
+// | / ,--- | | | |
+// +-v--+-^--+--^+-------+ | | | |
+// | bl | ls | p | p_off >--------------->| | |
+// +----+----+-----+-----+ | +-----+ |
+// | | off >------------->| raw |
+// +---------------+-----+ | |
+// iterator +---------+
+//
+TEST(BufferListIterator, constructors) {
+ //
+ // iterator()
+ //
+ {
+ buffer::list::iterator i;
+ EXPECT_EQ((unsigned)0, i.get_off());
+ }
+
+ //
+ // iterator(list *l, unsigned o=0)
+ //
+ {
+ bufferlist bl;
+ bl.append("ABC", 3);
+
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_EQ((unsigned)0, i.get_off());
+ EXPECT_EQ('A', *i);
+ }
+ {
+ bufferlist::iterator i(&bl, 1);
+ EXPECT_EQ('B', *i);
+ EXPECT_EQ((unsigned)2, i.get_remaining());
+ }
+ }
+
+ //
+ // iterator(list *l, unsigned o, std::list<ptr>::iterator ip, unsigned po)
+ // not tested because of http://tracker.ceph.com/issues/4101
+
+ //
+ // iterator(const iterator& other)
+ //
+ {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ bufferlist::iterator i(&bl, 1);
+ bufferlist::iterator j(i);
+ EXPECT_EQ(*i, *j);
+ ++j;
+ EXPECT_NE(*i, *j);
+ EXPECT_EQ('B', *i);
+ EXPECT_EQ('C', *j);
+ bl.c_str()[1] = 'X';
+ j.advance(-1);
+ EXPECT_EQ('X', *j);
+ }
+}
+
+TEST(BufferListIterator, operator_equal) {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ bufferlist::iterator i(&bl, 1);
+
+ i = i;
+ EXPECT_EQ('B', *i);
+ bufferlist::iterator j;
+ j = i;
+ EXPECT_EQ('B', *j);
+}
+
+TEST(BufferListIterator, get_off) {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ bufferlist::iterator i(&bl, 1);
+ EXPECT_EQ((unsigned)1, i.get_off());
+}
+
+TEST(BufferListIterator, get_remaining) {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ bufferlist::iterator i(&bl, 1);
+ EXPECT_EQ((unsigned)2, i.get_remaining());
+}
+
+TEST(BufferListIterator, end) {
+ bufferlist bl;
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_TRUE(i.end());
+ }
+ bl.append("ABC", 3);
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_FALSE(i.end());
+ }
+}
+
+TEST(BufferListIterator, advance) {
+ bufferlist bl;
+ const std::string one("ABC");
+ bl.append(bufferptr(one.c_str(), one.size()));
+ const std::string two("DEF");
+ bl.append(bufferptr(two.c_str(), two.size()));
+
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ }
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_THROW(i.advance(-1), buffer::end_of_buffer);
+ }
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_EQ('A', *i);
+ i.advance(1);
+ EXPECT_EQ('B', *i);
+ i.advance(3);
+ EXPECT_EQ('E', *i);
+ i.advance(-3);
+ EXPECT_EQ('B', *i);
+ i.advance(-1);
+ EXPECT_EQ('A', *i);
+ }
+}
+
+TEST(BufferListIterator, seek) {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ bufferlist::iterator i(&bl, 1);
+ EXPECT_EQ('B', *i);
+ i.seek(2);
+ EXPECT_EQ('C', *i);
+}
+
+TEST(BufferListIterator, operator_star) {
+ bufferlist bl;
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_THROW(*i, buffer::end_of_buffer);
+ }
+ bl.append("ABC", 3);
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_EQ('A', *i);
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ EXPECT_THROW(*i, buffer::end_of_buffer);
+ }
+}
+
+TEST(BufferListIterator, operator_plus_plus) {
+ bufferlist bl;
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_THROW(++i, buffer::end_of_buffer);
+ }
+ bl.append("ABC", 3);
+ {
+ bufferlist::iterator i(&bl);
+ ++i;
+ EXPECT_EQ('B', *i);
+ }
+}
+
+TEST(BufferListIterator, get_current_ptr) {
+ bufferlist bl;
+ {
+ bufferlist::iterator i(&bl);
+ EXPECT_THROW(++i, buffer::end_of_buffer);
+ }
+ bl.append("ABC", 3);
+ {
+ bufferlist::iterator i(&bl, 1);
+ const buffer::ptr ptr = i.get_current_ptr();
+ EXPECT_EQ('B', ptr[0]);
+ EXPECT_EQ((unsigned)1, ptr.offset());
+ EXPECT_EQ((unsigned)2, ptr.length());
+ }
+}
+
+TEST(BufferListIterator, copy) {
+ bufferlist bl;
+ const char *expected = "ABC";
+ bl.append(expected, 3);
+ //
+ // void copy(unsigned len, char *dest);
+ //
+ {
+ char* copy = (char*)malloc(3);
+ ::memset(copy, 'X', 3);
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ i.copy(2, copy);
+ EXPECT_EQ(0, ::memcmp(copy, expected, 2));
+ EXPECT_EQ('X', copy[2]);
+ i.seek(0);
+ i.copy(3, copy);
+ EXPECT_EQ(0, ::memcmp(copy, expected, 3));
+ }
+ //
+ // void buffer::list::iterator::copy(unsigned len, ptr &dest)
+ //
+ {
+ bufferptr ptr;
+ bufferlist::iterator i(&bl);
+ i.copy(2, ptr);
+ EXPECT_EQ((unsigned)2, ptr.length());
+ EXPECT_EQ('A', ptr[0]);
+ EXPECT_EQ('B', ptr[1]);
+ }
+ //
+ // void buffer::list::iterator::copy(unsigned len, list &dest)
+ //
+ {
+ bufferlist copy;
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ i.copy(2, copy);
+ EXPECT_EQ(0, ::memcmp(copy.c_str(), expected, 2));
+ i.seek(0);
+ i.copy(3, copy);
+ EXPECT_EQ('A', copy[0]);
+ EXPECT_EQ('B', copy[1]);
+ EXPECT_EQ('A', copy[2]);
+ EXPECT_EQ('B', copy[3]);
+ EXPECT_EQ('C', copy[4]);
+ EXPECT_EQ((unsigned)(2 + 3), copy.length());
+ }
+ //
+ // void buffer::list::iterator::copy_all(list &dest)
+ //
+ {
+ bufferlist copy;
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ i.copy_all(copy);
+ EXPECT_EQ('A', copy[0]);
+ EXPECT_EQ('B', copy[1]);
+ EXPECT_EQ('C', copy[2]);
+ EXPECT_EQ((unsigned)3, copy.length());
+ }
+ //
+ // void copy(unsigned len, std::string &dest)
+ //
+ {
+ std::string copy;
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ i.copy(2, copy);
+ EXPECT_EQ(0, ::memcmp(copy.c_str(), expected, 2));
+ i.seek(0);
+ i.copy(3, copy);
+ EXPECT_EQ('A', copy[0]);
+ EXPECT_EQ('B', copy[1]);
+ EXPECT_EQ('A', copy[2]);
+ EXPECT_EQ('B', copy[3]);
+ EXPECT_EQ('C', copy[4]);
+ EXPECT_EQ((unsigned)(2 + 3), copy.length());
+ }
+}
+
+TEST(BufferListIterator, copy_in) {
+ bufferlist bl;
+ const char *existing = "XXX";
+ bl.append(existing, 3);
+ //
+ // void buffer::list::iterator::copy_in(unsigned len, const char *src)
+ //
+ {
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ const char *expected = "ABC";
+ i.copy_in(3, expected);
+ EXPECT_EQ(0, ::memcmp(bl.c_str(), expected, 3));
+ EXPECT_EQ('A', bl[0]);
+ EXPECT_EQ('B', bl[1]);
+ EXPECT_EQ('C', bl[2]);
+ EXPECT_EQ((unsigned)3, bl.length());
+ }
+ //
+ // void buffer::list::iterator::copy_in(unsigned len, const list& otherl)
+ //
+ {
+ bufferlist::iterator i(&bl);
+ //
+ // demonstrates that it seeks back to offset if p == ls->end()
+ //
+ EXPECT_THROW(i.advance(200), buffer::end_of_buffer);
+ bufferlist expected;
+ expected.append("ABC", 3);
+ i.copy_in(3, expected);
+ EXPECT_EQ(0, ::memcmp(bl.c_str(), expected.c_str(), 3));
+ EXPECT_EQ('A', bl[0]);
+ EXPECT_EQ('B', bl[1]);
+ EXPECT_EQ('C', bl[2]);
+ EXPECT_EQ((unsigned)3, bl.length());
+ }
+}
+
+TEST(BufferList, constructors) {
+ //
+ // list()
+ //
+ {
+ bufferlist bl;
+ ASSERT_EQ((unsigned)0, bl.length());
+ }
+ //
+ // list(unsigned prealloc)
+ //
+ {
+ bufferlist bl(1);
+ ASSERT_EQ((unsigned)0, bl.length());
+ bl.append('A');
+ ASSERT_EQ('A', bl[0]);
+ }
+ //
+ // list(const list& other)
+ //
+ {
+ bufferlist bl(1);
+ bl.append('A');
+ ASSERT_EQ('A', bl[0]);
+ bufferlist copy(bl);
+ ASSERT_EQ('A', copy[0]);
+ }
+}
+
+TEST(BufferList, operator_equal) {
+ bufferlist bl;
+ bl.append("ABC", 3);
+ {
+ std::string dest;
+ bl.copy(1, 1, dest);
+ ASSERT_EQ('B', dest[0]);
+ }
+ bufferlist copy;
+ copy = bl;
+ {
+ std::string dest;
+ copy.copy(1, 1, dest);
+ ASSERT_EQ('B', dest[0]);
+ }
+}
+
+TEST(BufferList, buffers) {
+ bufferlist bl;
+ ASSERT_EQ((unsigned)0, bl.buffers().size());
+ bl.append('A');
+ ASSERT_EQ((unsigned)1, bl.buffers().size());
+}
+
+TEST(BufferList, swap) {
+ bufferlist b1;
+ b1.append('A');
+
+ bufferlist b2;
+ b2.append('B');
+
+ b1.swap(b2);
+
+ std::string s1;
+ b1.copy(0, 1, s1);
+ ASSERT_EQ('B', s1[0]);
+
+ std::string s2;
+ b2.copy(0, 1, s2);
+ ASSERT_EQ('A', s2[0]);
+}
+
+TEST(BufferList, length) {
+ bufferlist bl;
+ ASSERT_EQ((unsigned)0, bl.length());
+ bl.append('A');
+ ASSERT_EQ((unsigned)1, bl.length());
+}
+
+TEST(BufferList, contents_equal) {
+ //
+ // A BB
+ // AB B
+ //
+ bufferlist bl1;
+ bl1.append("A");
+ bl1.append("BB");
+ bufferlist bl2;
+ ASSERT_FALSE(bl1.contents_equal(bl2)); // different length
+ bl2.append("AB");
+ bl2.append("B");
+ ASSERT_TRUE(bl1.contents_equal(bl2)); // same length same content
+ //
+ // ABC
+ //
+ bufferlist bl3;
+ bl3.append("ABC");
+ ASSERT_FALSE(bl1.contents_equal(bl3)); // same length different content
+}
+
+TEST(BufferList, is_page_aligned) {
+ {
+ bufferlist bl;
+ EXPECT_TRUE(bl.is_page_aligned());
+ }
+ {
+ bufferlist bl;
+ bufferptr ptr(2);
+ ptr.set_offset(1);
+ ptr.set_length(1);
+ bl.append(ptr);
+ EXPECT_FALSE(bl.is_page_aligned());
+ bl.rebuild_page_aligned();
+ EXPECT_FALSE(bl.is_page_aligned());
+ }
+ {
+ bufferlist bl;
+ bufferptr ptr(CEPH_PAGE_SIZE + 1);
+ ptr.set_offset(1);
+ ptr.set_length(CEPH_PAGE_SIZE);
+ bl.append(ptr);
+ EXPECT_FALSE(bl.is_page_aligned());
+ bl.rebuild_page_aligned();
+ EXPECT_TRUE(bl.is_page_aligned());
+ }
+}
+
+TEST(BufferList, is_n_page_sized) {
+ {
+ bufferlist bl;
+ EXPECT_TRUE(bl.is_n_page_sized());
+ }
+ {
+ bufferlist bl;
+ bl.append_zero(1);
+ EXPECT_FALSE(bl.is_n_page_sized());
+ }
+ {
+ bufferlist bl;
+ bl.append_zero(CEPH_PAGE_SIZE);
+ EXPECT_TRUE(bl.is_n_page_sized());
+ }
+}
+
+TEST(BufferList, is_zero) {
+ {
+ bufferlist bl;
+ EXPECT_TRUE(bl.is_zero());
+ }
+ {
+ bufferlist bl;
+ bl.append('A');
+ EXPECT_FALSE(bl.is_zero());
+ }
+ {
+ bufferlist bl;
+ bl.append_zero(1);
+ EXPECT_TRUE(bl.is_zero());
+ }
+}
+
+TEST(BufferList, clear) {
+ bufferlist bl;
+ unsigned len = 17;
+ bl.append_zero(len);
+ bl.clear();
+ EXPECT_EQ((unsigned)0, bl.length());
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+}
+
+TEST(BufferList, push_front) {
+ //
+ // void push_front(ptr& bp)
+ //
+ {
+ bufferlist bl;
+ bufferptr ptr;
+ bl.push_front(ptr);
+ EXPECT_EQ((unsigned)0, bl.length());
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ }
+ unsigned len = 17;
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferptr ptr(len);
+ ptr.c_str()[0] = 'B';
+ bl.push_front(ptr);
+ EXPECT_EQ((unsigned)(1 + len), bl.length());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl.buffers().front()[0]);
+ EXPECT_EQ(ptr.get_raw(), bl.buffers().front().get_raw());
+ }
+ //
+ // void push_front(raw *r)
+ //
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferptr ptr(len);
+ ptr.c_str()[0] = 'B';
+ bl.push_front(ptr.get_raw());
+ EXPECT_EQ((unsigned)(1 + len), bl.length());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl.buffers().front()[0]);
+ EXPECT_EQ(ptr.get_raw(), bl.buffers().front().get_raw());
+ }
+}
+
+TEST(BufferList, push_back) {
+ //
+ // void push_back(ptr& bp)
+ //
+ {
+ bufferlist bl;
+ bufferptr ptr;
+ bl.push_back(ptr);
+ EXPECT_EQ((unsigned)0, bl.length());
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ }
+ unsigned len = 17;
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferptr ptr(len);
+ ptr.c_str()[0] = 'B';
+ bl.push_back(ptr);
+ EXPECT_EQ((unsigned)(1 + len), bl.length());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl.buffers().back()[0]);
+ EXPECT_EQ(ptr.get_raw(), bl.buffers().back().get_raw());
+ }
+ //
+ // void push_back(raw *r)
+ //
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferptr ptr(len);
+ ptr.c_str()[0] = 'B';
+ bl.push_back(ptr.get_raw());
+ EXPECT_EQ((unsigned)(1 + len), bl.length());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl.buffers().back()[0]);
+ EXPECT_EQ(ptr.get_raw(), bl.buffers().back().get_raw());
+ }
+}
+
+TEST(BufferList, is_contiguous) {
+ bufferlist bl;
+ EXPECT_TRUE(bl.is_contiguous());
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ bl.append('A');
+ EXPECT_TRUE(bl.is_contiguous());
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ bufferptr ptr(1);
+ bl.push_back(ptr);
+ EXPECT_FALSE(bl.is_contiguous());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+}
+
+TEST(BufferList, rebuild) {
+ {
+ bufferlist bl;
+ bufferptr ptr(2);
+ ptr.set_offset(1);
+ ptr.set_length(1);
+ bl.append(ptr);
+ EXPECT_FALSE(bl.is_page_aligned());
+ bl.rebuild();
+ EXPECT_FALSE(bl.is_page_aligned());
+ }
+ {
+ bufferlist bl;
+ const std::string str(CEPH_PAGE_SIZE, 'X');
+ bl.append(str.c_str(), str.size());
+ bl.append(str.c_str(), str.size());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_TRUE(bl.is_page_aligned());
+ bl.rebuild();
+ EXPECT_TRUE(bl.is_page_aligned());
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ }
+}
+
+TEST(BufferList, rebuild_page_aligned) {
+ {
+ bufferlist bl;
+ {
+ bufferptr ptr(CEPH_PAGE_SIZE + 1);
+ ptr.set_offset(1);
+ ptr.set_length(CEPH_PAGE_SIZE);
+ bl.append(ptr);
+ }
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_FALSE(bl.is_page_aligned());
+ bl.rebuild_page_aligned();
+ EXPECT_TRUE(bl.is_page_aligned());
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ }
+ {
+ bufferlist bl;
+ {
+ bufferptr ptr(buffer::create_page_aligned(CEPH_PAGE_SIZE));
+ bl.append(ptr);
+ }
+ {
+ bufferptr ptr(CEPH_PAGE_SIZE + 1);
+ bl.append(ptr);
+ }
+ {
+ bufferptr ptr(2);
+ ptr.set_offset(1);
+ ptr.set_length(1);
+ bl.append(ptr);
+ }
+ {
+ bufferptr ptr(CEPH_PAGE_SIZE - 2);
+ bl.append(ptr);
+ }
+ {
+ bufferptr ptr(buffer::create_page_aligned(CEPH_PAGE_SIZE));
+ bl.append(ptr);
+ }
+ {
+ bufferptr ptr(CEPH_PAGE_SIZE + 1);
+ ptr.set_offset(1);
+ ptr.set_length(CEPH_PAGE_SIZE);
+ bl.append(ptr);
+ }
+ EXPECT_EQ((unsigned)6, bl.buffers().size());
+ EXPECT_TRUE((bl.length() & ~CEPH_PAGE_MASK) == 0);
+ EXPECT_FALSE(bl.is_page_aligned());
+ bl.rebuild_page_aligned();
+ EXPECT_TRUE(bl.is_page_aligned());
+ EXPECT_EQ((unsigned)4, bl.buffers().size());
+ }
+}
+
+TEST(BufferList, claim) {
+ bufferlist from;
+ {
+ bufferptr ptr(2);
+ from.append(ptr);
+ }
+ bufferlist to;
+ {
+ bufferptr ptr(4);
+ to.append(ptr);
+ }
+ EXPECT_EQ((unsigned)4, to.length());
+ EXPECT_EQ((unsigned)1, to.buffers().size());
+ to.claim(from);
+ EXPECT_EQ((unsigned)2, to.length());
+ EXPECT_EQ((unsigned)1, to.buffers().size());
+ EXPECT_EQ((unsigned)0, from.buffers().size());
+ EXPECT_EQ((unsigned)0, from.length());
+}
+
+TEST(BufferList, claim_append) {
+ bufferlist from;
+ {
+ bufferptr ptr(2);
+ from.append(ptr);
+ }
+ bufferlist to;
+ {
+ bufferptr ptr(4);
+ to.append(ptr);
+ }
+ EXPECT_EQ((unsigned)4, to.length());
+ EXPECT_EQ((unsigned)1, to.buffers().size());
+ to.claim_append(from);
+ EXPECT_EQ((unsigned)(4 + 2), to.length());
+ EXPECT_EQ((unsigned)4, to.buffers().front().length());
+ EXPECT_EQ((unsigned)2, to.buffers().back().length());
+ EXPECT_EQ((unsigned)2, to.buffers().size());
+ EXPECT_EQ((unsigned)0, from.buffers().size());
+ EXPECT_EQ((unsigned)0, from.length());
+}
+
+TEST(BufferList, claim_prepend) {
+ bufferlist from;
+ {
+ bufferptr ptr(2);
+ from.append(ptr);
+ }
+ bufferlist to;
+ {
+ bufferptr ptr(4);
+ to.append(ptr);
+ }
+ EXPECT_EQ((unsigned)4, to.length());
+ EXPECT_EQ((unsigned)1, to.buffers().size());
+ to.claim_prepend(from);
+ EXPECT_EQ((unsigned)(2 + 4), to.length());
+ EXPECT_EQ((unsigned)2, to.buffers().front().length());
+ EXPECT_EQ((unsigned)4, to.buffers().back().length());
+ EXPECT_EQ((unsigned)2, to.buffers().size());
+ EXPECT_EQ((unsigned)0, from.buffers().size());
+ EXPECT_EQ((unsigned)0, from.length());
+}
+
+TEST(BufferList, begin) {
+ bufferlist bl;
+ bl.append("ABC");
+ bufferlist::iterator i = bl.begin();
+ EXPECT_EQ('A', *i);
+}
+
+TEST(BufferList, end) {
+ bufferlist bl;
+ bl.append("ABC");
+ bufferlist::iterator i = bl.end();
+ i.advance(-1);
+ EXPECT_EQ('C', *i);
+}
+
+TEST(BufferList, copy) {
+ //
+ // void copy(unsigned off, unsigned len, char *dest) const;
+ //
+ {
+ bufferlist bl;
+ EXPECT_THROW(bl.copy((unsigned)100, (unsigned)100, (char*)0), buffer::end_of_buffer);
+ const char *expected = "ABC";
+ bl.append(expected);
+ char *dest = new char[2];
+ bl.copy(1, 2, dest);
+ EXPECT_EQ(0, ::memcmp(expected + 1, dest, 2));
+ delete [] dest;
+ }
+ //
+ // void copy(unsigned off, unsigned len, list &dest) const;
+ //
+ {
+ bufferlist bl;
+ bufferlist dest;
+ EXPECT_THROW(bl.copy((unsigned)100, (unsigned)100, dest), buffer::end_of_buffer);
+ const char *expected = "ABC";
+ bl.append(expected);
+ bl.copy(1, 2, dest);
+ EXPECT_EQ(0, ::memcmp(expected + 1, dest.c_str(), 2));
+ }
+ //
+ // void copy(unsigned off, unsigned len, std::string &dest) const;
+ //
+ {
+ bufferlist bl;
+ std::string dest;
+ EXPECT_THROW(bl.copy((unsigned)100, (unsigned)100, dest), buffer::end_of_buffer);
+ const char *expected = "ABC";
+ bl.append(expected);
+ bl.copy(1, 2, dest);
+ EXPECT_EQ(0, ::memcmp(expected + 1, dest.c_str(), 2));
+ }
+}
+
+TEST(BufferList, copy_in) {
+ //
+ // void copy_in(unsigned off, unsigned len, const char *src);
+ //
+ {
+ bufferlist bl;
+ bl.append("XXX");
+ EXPECT_THROW(bl.copy_in((unsigned)100, (unsigned)100, (char*)0), buffer::end_of_buffer);
+ bl.copy_in(1, 2, "AB");
+ EXPECT_EQ(0, ::memcmp("XAB", bl.c_str(), 3));
+ }
+ //
+ // void copy_in(unsigned off, unsigned len, const list& src);
+ //
+ {
+ bufferlist bl;
+ bl.append("XXX");
+ bufferlist src;
+ src.append("ABC");
+ EXPECT_THROW(bl.copy_in((unsigned)100, (unsigned)100, src), buffer::end_of_buffer);
+ bl.copy_in(1, 2, src);
+ EXPECT_EQ(0, ::memcmp("XAB", bl.c_str(), 3));
+ }
+}
+
+TEST(BufferList, append) {
+ //
+ // void append(char c);
+ //
+ {
+ bufferlist bl;
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ bl.append('A');
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_TRUE(bl.is_page_aligned());
+ }
+ //
+ // void append(const char *data, unsigned len);
+ //
+ {
+ bufferlist bl(CEPH_PAGE_SIZE);
+ std::string str(CEPH_PAGE_SIZE * 2, 'X');
+ bl.append(str.c_str(), str.size());
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ(CEPH_PAGE_SIZE, bl.buffers().front().length());
+ EXPECT_EQ(CEPH_PAGE_SIZE, bl.buffers().back().length());
+ }
+ //
+ // void append(const std::string& s);
+ //
+ {
+ bufferlist bl(CEPH_PAGE_SIZE);
+ std::string str(CEPH_PAGE_SIZE * 2, 'X');
+ bl.append(str);
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ(CEPH_PAGE_SIZE, bl.buffers().front().length());
+ EXPECT_EQ(CEPH_PAGE_SIZE, bl.buffers().back().length());
+ }
+ //
+ // void append(const ptr& bp);
+ //
+ {
+ bufferlist bl;
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ EXPECT_EQ((unsigned)0, bl.length());
+ {
+ bufferptr ptr;
+ bl.append(ptr);
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ EXPECT_EQ((unsigned)0, bl.length());
+ }
+ {
+ bufferptr ptr(3);
+ bl.append(ptr);
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_EQ((unsigned)3, bl.length());
+ }
+ }
+ //
+ // void append(const ptr& bp, unsigned off, unsigned len);
+ //
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferptr back(bl.buffers().back());
+ bufferptr in(back);
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_EQ((unsigned)1, bl.length());
+ EXPECT_THROW(bl.append(in, (unsigned)100, (unsigned)100), FailedAssertion);
+ EXPECT_LT((unsigned)0, in.unused_tail_length());
+ in.append('B');
+ bl.append(in, back.end(), 1);
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_EQ((unsigned)2, bl.length());
+ EXPECT_EQ('B', bl[1]);
+ }
+ {
+ bufferlist bl;
+ EXPECT_EQ((unsigned)0, bl.buffers().size());
+ EXPECT_EQ((unsigned)0, bl.length());
+ bufferptr ptr(2);
+ ptr.set_length(0);
+ ptr.append("AB", 2);
+ bl.append(ptr, 1, 1);
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_EQ((unsigned)1, bl.length());
+ }
+ //
+ // void append(const list& bl);
+ //
+ {
+ bufferlist bl;
+ bl.append('A');
+ bufferlist other;
+ other.append('B');
+ bl.append(other);
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl[1]);
+ }
+ //
+ // void append(std::istream& in);
+ //
+ {
+ bufferlist bl;
+ std::string expected("ABC\n\nDEF\n");
+ std::istringstream is("ABC\n\nDEF");
+ bl.append(is);
+ EXPECT_EQ(0, ::memcmp(expected.c_str(), bl.c_str(), expected.size()));
+ EXPECT_EQ(expected.size(), bl.length());
+ }
+}
+
+TEST(BufferList, append_zero) {
+ bufferlist bl;
+ bl.append('A');
+ EXPECT_EQ((unsigned)1, bl.buffers().size());
+ EXPECT_EQ((unsigned)1, bl.length());
+ bl.append_zero(1);
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ((unsigned)2, bl.length());
+ EXPECT_EQ('\0', bl[1]);
+}
+
+TEST(BufferList, operator_brackets) {
+ bufferlist bl;
+ EXPECT_THROW(bl[1], buffer::end_of_buffer);
+ bl.append('A');
+ bufferlist other;
+ other.append('B');
+ bl.append(other);
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ('B', bl[1]);
+}
+
+TEST(BufferList, c_str) {
+ bufferlist bl;
+ EXPECT_EQ((const char*)NULL, bl.c_str());
+ bl.append('A');
+ bufferlist other;
+ other.append('B');
+ bl.append(other);
+ EXPECT_EQ((unsigned)2, bl.buffers().size());
+ EXPECT_EQ(0, ::memcmp("AB", bl.c_str(), 2));
+}
+
+TEST(BufferList, substr_of) {
+ bufferlist bl;
+ EXPECT_THROW(bl.substr_of(bl, 1, 1), buffer::end_of_buffer);
+ const char *s[] = {
+ "ABC",
+ "DEF",
+ "GHI",
+ "JKL"
+ };
+ for (unsigned i = 0; i < 4; i++) {
+ bufferptr ptr(s[i], strlen(s[i]));
+ bl.push_back(ptr);
+ }
+ EXPECT_EQ((unsigned)4, bl.buffers().size());
+
+ bufferlist other;
+ other.append("TO BE CLEARED");
+ other.substr_of(bl, 4, 4);
+ EXPECT_EQ((unsigned)2, other.buffers().size());
+ EXPECT_EQ((unsigned)4, other.length());
+ EXPECT_EQ(0, ::memcmp("EFGH", other.c_str(), 4));
+}
+
+TEST(BufferList, splice) {
+ bufferlist bl;
+ EXPECT_THROW(bl.splice(1, 1), buffer::end_of_buffer);
+ const char *s[] = {
+ "ABC",
+ "DEF",
+ "GHI",
+ "JKL"
+ };
+ for (unsigned i = 0; i < 4; i++) {
+ bufferptr ptr(s[i], strlen(s[i]));
+ bl.push_back(ptr);
+ }
+ EXPECT_EQ((unsigned)4, bl.buffers().size());
+ EXPECT_THROW(bl.splice(0, 0), FailedAssertion);
+
+ bufferlist other;
+ other.append('X');
+ bl.splice(4, 4, &other);
+ EXPECT_EQ((unsigned)3, other.buffers().size());
+ EXPECT_EQ((unsigned)5, other.length());
+ EXPECT_EQ(0, ::memcmp("XEFGH", other.c_str(), other.length()));
+ EXPECT_EQ((unsigned)8, bl.length());
+ {
+ bufferlist tmp(bl);
+ EXPECT_EQ(0, ::memcmp("ABCDIJKL", tmp.c_str(), tmp.length()));
+ }
+
+ bl.splice(4, 4);
+ EXPECT_EQ((unsigned)4, bl.length());
+ EXPECT_EQ(0, ::memcmp("ABCD", bl.c_str(), bl.length()));
+}
+
+TEST(BufferList, write) {
+ std::ostringstream stream;
+ bufferlist bl;
+ bl.append("ABC");
+ bl.write(1, 2, stream);
+ EXPECT_EQ("BC", stream.str());
+}
+
+TEST(BufferList, encode_base64) {
+ bufferlist bl;
+ bl.append("ABCD");
+ bufferlist other;
+ bl.encode_base64(other);
+ const char *expected = "QUJDRA==";
+ EXPECT_EQ(0, ::memcmp(expected, other.c_str(), strlen(expected)));
+}
+
+TEST(BufferList, decode_base64) {
+ bufferlist bl;
+ bl.append("QUJDRA==");
+ bufferlist other;
+ other.decode_base64(bl);
+ const char *expected = "ABCD";
+ EXPECT_EQ(0, ::memcmp(expected, other.c_str(), strlen(expected)));
+ bufferlist malformed;
+ malformed.append("QUJDRA");
+ EXPECT_THROW(other.decode_base64(malformed), buffer::malformed_input);
+}
+
+TEST(BufferList, hexdump) {
+ bufferlist bl;
+ std::ostringstream stream;
+ bl.append("013245678901234\0006789012345678901234", 32);
+ bl.hexdump(stream);
+ EXPECT_EQ("0000 : 30 31 33 32 34 35 36 37 38 39 30 31 32 33 34 00 : 013245678901234.\n"
+ "0010 : 36 37 38 39 30 31 32 33 34 35 36 37 38 39 30 31 : 6789012345678901\n",
+ stream.str());
+}
+
+TEST(BufferList, read_file) {
+ std::string error;
+ bufferlist bl;
+ ::unlink("testfile");
+ EXPECT_EQ(-ENOENT, bl.read_file("UNLIKELY", &error));
+ ::system("echo ABC > testfile ; chmod 0 testfile");
+ EXPECT_EQ(-EACCES, bl.read_file("testfile", &error));
+ ::system("chmod +r testfile");
+ EXPECT_EQ(0, bl.read_file("testfile", &error));
+ ::unlink("testfile");
+ EXPECT_EQ((unsigned)4, bl.length());
+ std::string actual(bl.c_str(), bl.length());
+ EXPECT_EQ("ABC\n", actual);
+}
+
+TEST(BufferList, read_fd) {
+ unsigned len = 4;
+ ::unlink("testfile");
+ ::system("echo ABC > testfile");
+ int fd = -1;
+ bufferlist bl;
+ EXPECT_EQ(-EBADF, bl.read_fd(fd, len));
+ fd = ::open("testfile", O_RDONLY);
+ EXPECT_EQ(len, bl.read_fd(fd, len));
+ EXPECT_EQ(len, bl.length());
+ EXPECT_EQ(CEPH_PAGE_SIZE - len, bl.buffers().front().unused_tail_length());
+ ::close(fd);
+ ::unlink("testfile");
+}
+
+TEST(BufferList, write_file) {
+ ::unlink("testfile");
+ int mode = 0600;
+ bufferlist bl;
+ EXPECT_EQ(-ENOENT, bl.write_file("un/like/ly", mode));
+ bl.append("ABC");
+ EXPECT_EQ(0, bl.write_file("testfile", mode));
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ ::stat("testfile", &st);
+ EXPECT_EQ((unsigned)(mode | S_IFREG), st.st_mode);
+ ::unlink("testfile");
+}
+
+TEST(BufferList, write_fd) {
+ ::unlink("testfile");
+ int fd = ::open("testfile", O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ bufferlist bl;
+ for (unsigned i = 0; i < IOV_MAX * 2; i++) {
+ bufferptr ptr("A", 1);
+ bl.push_back(ptr);
+ }
+ EXPECT_EQ(0, bl.write_fd(fd));
+ ::close(fd);
+ struct stat st;
+ memset(&st, 0, sizeof(st));
+ ::stat("testfile", &st);
+ EXPECT_EQ(IOV_MAX * 2, st.st_size);
+ ::unlink("testfile");
+}
+
+TEST(BufferList, crc32c) {
+ bufferlist bl;
+ __u32 crc = 0;
+ bl.append("A");
+ crc = bl.crc32c(crc);
+ EXPECT_EQ((unsigned)0xB3109EBF, crc);
+ crc = bl.crc32c(crc);
+ EXPECT_EQ((unsigned)0x5FA5C0CC, crc);
+}
+
+TEST(BufferList, compare) {
+ bufferlist a;
+ a.append("A");
+ bufferlist ab;
+ ab.append("AB");
+ bufferlist ac;
+ ac.append("AC");
+ //
+ // bool operator>(bufferlist& l, bufferlist& r)
+ //
+ ASSERT_FALSE(a > ab);
+ ASSERT_TRUE(ab > a);
+ ASSERT_TRUE(ac > ab);
+ ASSERT_FALSE(ab > ac);
+ ASSERT_FALSE(ab > ab);
+ //
+ // bool operator>=(bufferlist& l, bufferlist& r)
+ //
+ ASSERT_FALSE(a >= ab);
+ ASSERT_TRUE(ab >= a);
+ ASSERT_TRUE(ac >= ab);
+ ASSERT_FALSE(ab >= ac);
+ ASSERT_TRUE(ab >= ab);
+ //
+ // bool operator<(bufferlist& l, bufferlist& r)
+ //
+ ASSERT_TRUE(a < ab);
+ ASSERT_FALSE(ab < a);
+ ASSERT_FALSE(ac < ab);
+ ASSERT_TRUE(ab < ac);
+ ASSERT_FALSE(ab < ab);
+ //
+ // bool operator<=(bufferlist& l, bufferlist& r)
+ //
+ ASSERT_TRUE(a <= ab);
+ ASSERT_FALSE(ab <= a);
+ ASSERT_FALSE(ac <= ab);
+ ASSERT_TRUE(ab <= ac);
+ ASSERT_TRUE(ab <= ab);
+ //
+ // bool operator==(bufferlist &l, bufferlist &r)
+ //
+ ASSERT_FALSE(a == ab);
+ ASSERT_FALSE(ac == ab);
+ ASSERT_TRUE(ab == ab);
+}
+
+TEST(BufferList, ostream) {
+ std::ostringstream stream;
+ bufferlist bl;
+ const char *s[] = {
+ "ABC",
+ "DEF"
+ };
+ for (unsigned i = 0; i < 2; i++) {
+ bufferptr ptr(s[i], strlen(s[i]));
+ bl.push_back(ptr);
+ }
+ stream << bl;
+ std::cerr << stream.str() << std::endl;
+ EXPECT_GT(stream.str().size(), stream.str().find("list(len=6,"));
+ EXPECT_GT(stream.str().size(), stream.str().find("len 3 nref 1),\n"));
+ EXPECT_GT(stream.str().size(), stream.str().find("len 3 nref 1)\n"));
+}
+
+TEST(BufferList, zero) {
+ //
+ // void zero()
+ //
+ {
+ bufferlist bl;
+ bl.append('A');
+ EXPECT_EQ('A', bl[0]);
+ bl.zero();
+ EXPECT_EQ('\0', bl[0]);
+ }
+ //
+ // void zero(unsigned o, unsigned l)
+ //
+ const char *s[] = {
+ "ABC",
+ "DEF",
+ "GHI",
+ "KLM"
+ };
+ {
+ bufferlist bl;
+ bufferptr ptr(s[0], strlen(s[0]));
+ bl.push_back(ptr);
+ bl.zero((unsigned)0, (unsigned)1);
+ EXPECT_EQ(0, ::memcmp("\0BC", bl.c_str(), 3));
+ }
+ {
+ bufferlist bl;
+ for (unsigned i = 0; i < 4; i++) {
+ bufferptr ptr(s[i], strlen(s[i]));
+ bl.push_back(ptr);
+ }
+ EXPECT_THROW(bl.zero((unsigned)0, (unsigned)2000), FailedAssertion);
+ bl.zero((unsigned)2, (unsigned)5);
+ EXPECT_EQ(0, ::memcmp("AB\0\0\0\0\0HIKLM", bl.c_str(), 9));
+ }
+ {
+ bufferlist bl;
+ for (unsigned i = 0; i < 4; i++) {
+ bufferptr ptr(s[i], strlen(s[i]));
+ bl.push_back(ptr);
+ }
+ bl.zero((unsigned)3, (unsigned)3);
+ EXPECT_EQ(0, ::memcmp("ABC\0\0\0GHIKLM", bl.c_str(), 9));
+ }
+}
TEST(BufferList, EmptyAppend) {
bufferlist bl;
@@ -71,7 +1823,34 @@ TEST(BufferList, TestCopyAll) {
bufferlist bl2;
i.copy_all(bl2);
ASSERT_EQ(bl2.length(), BIG_SZ);
- unsigned char big2[BIG_SZ];
- bl2.copy(0, BIG_SZ, (char*)big2);
- ASSERT_EQ(memcmp(big.get(), big2, BIG_SZ), 0);
+ std::tr1::shared_ptr <unsigned char> big2(
+ (unsigned char*)malloc(BIG_SZ), free);
+ bl2.copy(0, BIG_SZ, (char*)big2.get());
+ ASSERT_EQ(memcmp(big.get(), big2.get(), BIG_SZ), 0);
}
+
+TEST(BufferHash, all) {
+ {
+ bufferlist bl;
+ bl.append("A");
+ bufferhash hash;
+ EXPECT_EQ((unsigned)0, hash.digest());
+ hash.update(bl);
+ EXPECT_EQ((unsigned)0xB3109EBF, hash.digest());
+ hash.update(bl);
+ EXPECT_EQ((unsigned)0x5FA5C0CC, hash.digest());
+ }
+ {
+ bufferlist bl;
+ bl.append("A");
+ bufferhash hash;
+ EXPECT_EQ((unsigned)0, hash.digest());
+ bufferhash& returned_hash = hash << bl;
+ EXPECT_EQ(&returned_hash, &hash);
+ EXPECT_EQ((unsigned)0xB3109EBF, hash.digest());
+ }
+}
+
+// Local Variables:
+// compile-command: "cd .. ; make unittest_bufferlist ; ulimit -s unlimited ; CEPH_BUFFER_TRACK=true valgrind --max-stackframe=20000000 --tool=memcheck ./unittest_bufferlist # --gtest_filter=BufferList.constructors"
+// End:
diff --git a/src/test/common/Throttle.cc b/src/test/common/Throttle.cc
new file mode 100644
index 00000000000..60d7daebdac
--- /dev/null
+++ b/src/test/common/Throttle.cc
@@ -0,0 +1,256 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include "common/Mutex.h"
+#include "common/Thread.h"
+#include "common/Throttle.h"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include <gtest/gtest.h>
+
+class ThrottleTest : public ::testing::Test {
+protected:
+
+ class Thread_get : public Thread {
+ public:
+ Throttle &throttle;
+ int64_t count;
+ bool waited;
+
+ Thread_get(Throttle& _throttle, int64_t _count) :
+ throttle(_throttle),
+ count(_count),
+ waited(false)
+ {
+ }
+
+ virtual void *entry() {
+ waited = throttle.get(count);
+ throttle.put(count);
+ return NULL;
+ }
+ };
+
+};
+
+TEST_F(ThrottleTest, Throttle) {
+ ASSERT_THROW({
+ Throttle throttle(g_ceph_context, "throttle", -1);
+ }, FailedAssertion);
+
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_EQ(throttle.get_max(), throttle_max);
+ ASSERT_EQ(throttle.get_current(), 0);
+}
+
+TEST_F(ThrottleTest, take) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_THROW(throttle.take(-1), FailedAssertion);
+ ASSERT_EQ(throttle.take(throttle_max), throttle_max);
+ ASSERT_EQ(throttle.take(throttle_max), throttle_max * 2);
+}
+
+TEST_F(ThrottleTest, get) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+ ASSERT_THROW(throttle.get(-1), FailedAssertion);
+ ASSERT_FALSE(throttle.get(5));
+ ASSERT_EQ(throttle.put(5), 0);
+
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_FALSE(throttle.get(1, throttle_max + 1));
+ ASSERT_EQ(throttle.put(throttle_max + 1), 0);
+ ASSERT_FALSE(throttle.get(0, throttle_max));
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+
+ useconds_t delay = 1;
+
+ bool waited;
+
+ do {
+ cout << "Trying (1) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, 7);
+ t.create();
+ usleep(delay);
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+ t.join();
+
+ if (!(waited = t.waited))
+ delay *= 2;
+ } while(!waited);
+
+ do {
+ cout << "Trying (2) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max / 2));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, throttle_max);
+ t.create();
+ usleep(delay);
+
+ Thread_get u(throttle, 1);
+ u.create();
+ usleep(delay);
+
+ throttle.put(throttle_max / 2);
+
+ t.join();
+ u.join();
+
+ if (!(waited = t.waited && u.waited))
+ delay *= 2;
+ } while(!waited);
+
+}
+
+TEST_F(ThrottleTest, get_or_fail) {
+ {
+ Throttle throttle(g_ceph_context, "throttle");
+
+ ASSERT_TRUE(throttle.get_or_fail(5));
+ ASSERT_TRUE(throttle.get_or_fail(5));
+ }
+
+ {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max * 2));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max * 2));
+ ASSERT_EQ(throttle.put(throttle_max * 2), 0);
+
+ ASSERT_TRUE(throttle.get_or_fail(throttle_max));
+ ASSERT_FALSE(throttle.get_or_fail(1));
+ ASSERT_EQ(throttle.put(throttle_max), 0);
+ }
+}
+
+TEST_F(ThrottleTest, wait) {
+ int64_t throttle_max = 10;
+ Throttle throttle(g_ceph_context, "throttle", throttle_max);
+
+ useconds_t delay = 1;
+
+ bool waited;
+
+ do {
+ cout << "Trying (3) with delay " << delay << "us\n";
+
+ ASSERT_FALSE(throttle.get(throttle_max / 2));
+ ASSERT_FALSE(throttle.get_or_fail(throttle_max));
+
+ Thread_get t(throttle, throttle_max);
+ t.create();
+ usleep(delay);
+
+ //
+ // Throttle::_reset_max(int64_t m) used to contain a test
+ // that blocked the following statement, only if
+ // the argument was greater than throttle_max.
+ // Although a value lower than throttle_max would cover
+ // the same code in _reset_max, the throttle_max * 100
+ // value is left here to demonstrate that the problem
+ // has been solved.
+ //
+ throttle.wait(throttle_max * 100);
+ usleep(delay);
+ ASSERT_EQ(throttle.get_current(), throttle_max / 2);
+
+
+ t.join();
+
+ if (!(waited = t.waited)) {
+ delay *= 2;
+ // undo the changes we made
+ throttle.put(throttle_max / 2);
+ throttle.wait(throttle_max);
+ }
+ } while(!waited);
+}
+
+TEST_F(ThrottleTest, destructor) {
+ Thread_get *t;
+ {
+ int64_t throttle_max = 10;
+ Throttle *throttle = new Throttle(g_ceph_context, "throttle", throttle_max);
+
+ ASSERT_FALSE(throttle->get(5));
+
+ t = new Thread_get(*throttle, 7);
+ t->create();
+ bool blocked;
+ useconds_t delay = 1;
+ do {
+ usleep(delay);
+ if (throttle->get_or_fail(1)) {
+ throttle->put(1);
+ blocked = false;
+ } else {
+ blocked = true;
+ }
+ delay *= 2;
+ } while(!blocked);
+ delete throttle;
+ }
+
+ { //
+ // The thread is left hanging, otherwise it will abort().
+ // Deleting the Throttle on which it is waiting creates a
+ // inconsistency that will be detected: the Throttle object that
+ // it references no longer exists.
+ //
+ pthread_t id = t->get_thread_id();
+ ASSERT_EQ(pthread_kill(id, 0), 0);
+ delete t;
+ ASSERT_EQ(pthread_kill(id, 0), 0);
+ }
+}
+
+int main(int argc, char **argv) {
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+ common_init_finish(g_ceph_context);
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_throttle ; ./unittest_throttle # --gtest_filter=ThrottleTest.destructor --log-to-stderr=true --debug-filestore=20"
+// End:
diff --git a/src/test/crypto.cc b/src/test/crypto.cc
index 80a5495001d..24d5c5a475d 100644
--- a/src/test/crypto.cc
+++ b/src/test/crypto.cc
@@ -43,19 +43,19 @@ TEST(AES, Encrypt) {
};
bufferptr secret(secret_s, sizeof(secret_s));
- char plaintext_s[] = {
+ unsigned char plaintext_s[] = {
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
};
bufferlist plaintext;
- plaintext.append(plaintext_s, sizeof(plaintext_s));
+ plaintext.append((char *)plaintext_s, sizeof(plaintext_s));
bufferlist cipher;
std::string error;
h->encrypt(secret, plaintext, cipher, error);
ASSERT_EQ(error, "");
- char want_cipher[] = {
+ unsigned char want_cipher[] = {
0xb3, 0x8f, 0x5b, 0xc9, 0x35, 0x4c, 0xf8, 0xc6,
0x13, 0x15, 0x66, 0x6f, 0x37, 0xd7, 0x79, 0x3a,
0x11, 0x90, 0x7b, 0xe9, 0xd8, 0x3c, 0x35, 0x70,
@@ -79,16 +79,16 @@ TEST(AES, Decrypt) {
};
bufferptr secret(secret_s, sizeof(secret_s));
- char cipher_s[] = {
+ unsigned char cipher_s[] = {
0xb3, 0x8f, 0x5b, 0xc9, 0x35, 0x4c, 0xf8, 0xc6,
0x13, 0x15, 0x66, 0x6f, 0x37, 0xd7, 0x79, 0x3a,
0x11, 0x90, 0x7b, 0xe9, 0xd8, 0x3c, 0x35, 0x70,
0x58, 0x7b, 0x97, 0x9b, 0x03, 0xd2, 0xa5, 0x01,
};
bufferlist cipher;
- cipher.append(cipher_s, sizeof(cipher_s));
+ cipher.append((char *)cipher_s, sizeof(cipher_s));
- char want_plaintext[] = {
+ unsigned char want_plaintext[] = {
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
};
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index e3b7e305140..9ff7010a1d8 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -8,6 +8,9 @@ TYPE(filepath)
TYPE(SnapContext)
TYPE(SnapRealmInfo)
+#include "common/DecayCounter.h"
+TYPE(DecayCounter)
+
#include "common/LogEntry.h"
TYPE(LogEntryKey)
TYPE(LogEntry)
@@ -88,6 +91,84 @@ TYPE(MonCaps)
TYPE(DBObjectMap::_Header)
TYPE(DBObjectMap::State)
+#include "mds/Anchor.h"
+TYPE(Anchor)
+
+#include "mds/snap.h"
+TYPE(SnapInfo)
+TYPE(snaplink_t)
+TYPE(sr_t)
+
+#include "mds/mdstypes.h"
+TYPE(file_layout_policy_t)
+TYPE(frag_info_t)
+TYPE(nest_info_t)
+TYPE(client_writeable_range_t)
+TYPE(inode_t)
+TYPE(old_inode_t)
+TYPE(fnode_t)
+TYPE(old_rstat_t)
+TYPE(session_info_t)
+TYPE(string_snap_t)
+TYPE(MDSCacheObjectInfo)
+TYPE(mds_table_pending_t)
+TYPE(inode_load_vec_t)
+TYPE(dirfrag_load_vec_t)
+TYPE(mds_load_t)
+TYPE(cap_reconnect_t)
+
+#include "mds/MDSMap.h"
+TYPE_FEATUREFUL(MDSMap)
+TYPE_FEATUREFUL(MDSMap::mds_info_t)
+
+#include "mds/Capability.h"
+TYPE(Capability)
+
+#include "mds/AnchorServer.h"
+TYPE(AnchorServer)
+
+#include "mds/SessionMap.h"
+TYPE(SessionMap)
+
+#include "mds/events/ECommitted.h"
+TYPE(ECommitted)
+#include "mds/events/EExport.h"
+TYPE(EExport)
+#include "mds/events/EFragment.h"
+TYPE(EFragment)
+#include "mds/events/EImportFinish.h"
+TYPE(EImportFinish)
+#include "mds/events/EImportStart.h"
+TYPE(EImportStart)
+#include "mds/events/EMetaBlob.h"
+TYPE(EMetaBlob::fullbit)
+TYPE(EMetaBlob::remotebit)
+TYPE(EMetaBlob::nullbit)
+TYPE(EMetaBlob::dirlump)
+TYPE(EMetaBlob)
+#include "mds/events/EOpen.h"
+TYPE(EOpen)
+#include "mds/events/EResetJournal.h"
+TYPE(EResetJournal)
+#include "mds/events/ESession.h"
+TYPE(ESession)
+#include "mds/events/ESessions.h"
+TYPE(ESessions)
+#include "mds/events/ESlaveUpdate.h"
+TYPE(link_rollback)
+TYPE(rmdir_rollback)
+TYPE(rename_rollback::drec)
+TYPE(rename_rollback)
+TYPE(ESlaveUpdate)
+#include "mds/events/ESubtreeMap.h"
+TYPE(ESubtreeMap)
+#include "mds/events/ETableClient.h"
+TYPE(ETableClient)
+#include "mds/events/ETableServer.h"
+TYPE(ETableServer)
+#include "mds/events/EUpdate.h"
+TYPE(EUpdate)
+
#ifdef WITH_RADOSGW
#include "rgw/rgw_rados.h"
diff --git a/src/test/filestore/FileStoreTracker.cc b/src/test/filestore/FileStoreTracker.cc
index 2777a968704..afdc31bad23 100644
--- a/src/test/filestore/FileStoreTracker.cc
+++ b/src/test/filestore/FileStoreTracker.cc
@@ -52,7 +52,7 @@ int FileStoreTracker::init()
map<string, bufferlist> got;
db->get("STATUS", to_get, &got);
restart_seq = 0;
- if (got.size()) {
+ if (!got.empty()) {
bufferlist::iterator bp = got.begin()->second.begin();
::decode(restart_seq, bp);
}
@@ -240,7 +240,7 @@ ObjStatus get_obj_status(const pair<string, string> &obj,
map<string, bufferlist> got;
db->get(obj_to_meta_prefix(obj), to_get, &got);
ObjStatus retval;
- if (got.size()) {
+ if (!got.empty()) {
bufferlist::iterator bp = got.begin()->second.begin();
::decode(retval, bp);
}
@@ -357,7 +357,7 @@ ObjectContents FileStoreTracker::get_content(
map<string, bufferlist> got;
to_get.insert(seq_to_key(version));
db->get(obj_to_prefix(obj), to_get, &got);
- if (!got.size())
+ if (got.empty())
return ObjectContents();
pair<uint64_t, bufferlist> val;
bufferlist::iterator bp = got.begin()->second.begin();
diff --git a/src/test/filestore/TestFileStoreState.cc b/src/test/filestore/TestFileStoreState.cc
index 728d6e4c0ed..631d5294d9a 100644
--- a/src/test/filestore/TestFileStoreState.cc
+++ b/src/test/filestore/TestFileStoreState.cc
@@ -27,7 +27,7 @@
#define dout_subsys ceph_subsys_filestore
#undef dout_prefix
-#define dout_prefix *_dout << "test_filestore_state "
+#define dout_prefix *_dout << "ceph_test_filestore_state "
const coll_t TestFileStoreState::META_COLL("meta");
const coll_t TestFileStoreState::TEMP_COLL("temp");
@@ -229,7 +229,7 @@ hobject_t *TestFileStoreState::coll_entry_t::remove_obj_at(int pos, int *key)
hobject_t *TestFileStoreState::coll_entry_t::get_obj_at(int pos,
bool remove, int *key)
{
- if (!m_objects.size()) {
+ if (m_objects.empty()) {
dout(5) << "get_obj_at coll " << m_coll.to_str() << " pos " << pos
<< " in an empty collection" << dendl;
return NULL;
diff --git a/src/test/filestore/chain_xattr.cc b/src/test/filestore/chain_xattr.cc
new file mode 100644
index 00000000000..8346c02b2b1
--- /dev/null
+++ b/src/test/filestore/chain_xattr.cc
@@ -0,0 +1,217 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include "os/chain_xattr.h"
+#include "include/Context.h"
+#include "common/errno.h"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include <gtest/gtest.h>
+
+#define LARGE_BLOCK_LEN CHAIN_XATTR_MAX_BLOCK_LEN + 1024
+
+TEST(chain_xattr, get_and_set) {
+ const char* file = "testfile";
+ ::unlink(file);
+ int fd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC, 0700);
+ const string user("user.");
+
+ {
+ const string name = user + string(CHAIN_XATTR_MAX_NAME_LEN - user.size(), '@');
+ const string x(LARGE_BLOCK_LEN, 'X');
+
+ {
+ char y[LARGE_BLOCK_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_setxattr(file, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_getxattr(file, name.c_str(), 0, 0));
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_getxattr(file, name.c_str(), y, LARGE_BLOCK_LEN));
+ ASSERT_EQ(0, chain_removexattr(file, name.c_str()));
+ ASSERT_EQ(0, memcmp(x.c_str(), y, LARGE_BLOCK_LEN));
+ }
+
+ {
+ char y[LARGE_BLOCK_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_fsetxattr(fd, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_fgetxattr(fd, name.c_str(), 0, 0));
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_fgetxattr(fd, name.c_str(), y, LARGE_BLOCK_LEN));
+ ASSERT_EQ(0, chain_fremovexattr(fd, name.c_str()));
+ ASSERT_EQ(0, memcmp(x.c_str(), y, LARGE_BLOCK_LEN));
+ }
+ }
+
+ //
+ // when chain_setxattr is used to store value that is
+ // CHAIN_XATTR_MAX_BLOCK_LEN * 2 + 10 bytes long it
+ //
+ // add user.foo => CHAIN_XATTR_MAX_BLOCK_LEN bytes
+ // add user.foo@1 => CHAIN_XATTR_MAX_BLOCK_LEN bytes
+ // add user.foo@2 => 10 bytes
+ //
+ // then ( no chain_removexattr in between ) when it is used to
+ // override with a value that is exactly CHAIN_XATTR_MAX_BLOCK_LEN
+ // bytes long it will
+ //
+ // replace user.foo => CHAIN_XATTR_MAX_BLOCK_LEN bytes
+ // remove user.foo@1 => CHAIN_XATTR_MAX_BLOCK_LEN bytes
+ // leak user.foo@2 => 10 bytes
+ //
+ // see http://marc.info/?l=ceph-devel&m=136027076615853&w=4 for the
+ // discussion
+ //
+ {
+ const string name = user + string(CHAIN_XATTR_MAX_NAME_LEN - user.size(), '@');
+ const string x(LARGE_BLOCK_LEN, 'X');
+
+ {
+ char y[CHAIN_XATTR_MAX_NAME_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_setxattr(file, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_setxattr(file, name.c_str(), x.c_str(), CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_getxattr(file, name.c_str(), 0, 0));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_getxattr(file, name.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(0, chain_removexattr(file, name.c_str()));
+ ASSERT_EQ(0, memcmp(x.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ }
+
+ {
+ char y[CHAIN_XATTR_MAX_BLOCK_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_fsetxattr(fd, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_fsetxattr(fd, name.c_str(), x.c_str(), CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_fgetxattr(fd, name.c_str(), 0, 0));
+ ASSERT_EQ(CHAIN_XATTR_MAX_BLOCK_LEN, chain_fgetxattr(fd, name.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(0, chain_fremovexattr(fd, name.c_str()));
+ ASSERT_EQ(0, memcmp(x.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ }
+ }
+
+ {
+ int x = 0;
+ ASSERT_EQ(-ENOENT, chain_setxattr("UNLIKELY_TO_EXIST", "NAME", &x, sizeof(x)));
+ ASSERT_EQ(-ENOENT, chain_getxattr("UNLIKELY_TO_EXIST", "NAME", 0, 0));
+ ASSERT_EQ(-ENOENT, chain_getxattr("UNLIKELY_TO_EXIST", "NAME", &x, sizeof(x)));
+ ASSERT_EQ(-ENOENT, chain_removexattr("UNLIKELY_TO_EXIST", "NAME"));
+ int unlikely_to_be_a_valid_fd = 400;
+ ASSERT_EQ(-EBADF, chain_fsetxattr(unlikely_to_be_a_valid_fd, "NAME", &x, sizeof(x)));
+ ASSERT_EQ(-EBADF, chain_fgetxattr(unlikely_to_be_a_valid_fd, "NAME", 0, 0));
+ ASSERT_EQ(-EBADF, chain_fgetxattr(unlikely_to_be_a_valid_fd, "NAME", &x, sizeof(x)));
+ ASSERT_EQ(-EBADF, chain_fremovexattr(unlikely_to_be_a_valid_fd, "NAME"));
+ }
+
+ {
+ int x;
+ const string name = user + string(CHAIN_XATTR_MAX_NAME_LEN * 2, '@');
+ ASSERT_THROW(chain_setxattr(file, name.c_str(), &x, sizeof(x)), FailedAssertion);
+ ASSERT_THROW(chain_fsetxattr(fd, name.c_str(), &x, sizeof(x)), FailedAssertion);
+ }
+
+ {
+ const string name = user + string(CHAIN_XATTR_MAX_NAME_LEN - user.size(), '@');
+ const string x(LARGE_BLOCK_LEN, 'X');
+ {
+ char y[LARGE_BLOCK_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_setxattr(file, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(-ERANGE, chain_getxattr(file, name.c_str(), y, LARGE_BLOCK_LEN - 1));
+ ASSERT_EQ(-ERANGE, chain_getxattr(file, name.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(0, chain_removexattr(file, name.c_str()));
+ }
+
+ {
+ char y[LARGE_BLOCK_LEN];
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_fsetxattr(fd, name.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ(-ERANGE, chain_fgetxattr(fd, name.c_str(), y, LARGE_BLOCK_LEN - 1));
+ ASSERT_EQ(-ERANGE, chain_fgetxattr(fd, name.c_str(), y, CHAIN_XATTR_MAX_BLOCK_LEN));
+ ASSERT_EQ(0, chain_fremovexattr(fd, name.c_str()));
+ }
+ }
+
+ ::close(fd);
+ ::unlink(file);
+}
+
+TEST(chain_xattr, listxattr) {
+ const char* file = "testfile";
+ ::unlink(file);
+ int fd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC, 0700);
+ const string user("user.");
+ const string name1 = user + string(CHAIN_XATTR_MAX_NAME_LEN - user.size(), '1');
+ const string name2 = user + string(CHAIN_XATTR_MAX_NAME_LEN - user.size(), '@');
+ const string x(LARGE_BLOCK_LEN, 'X');
+ const int y = 1234;
+
+ ASSERT_EQ(LARGE_BLOCK_LEN, chain_setxattr(file, name1.c_str(), x.c_str(), LARGE_BLOCK_LEN));
+ ASSERT_EQ((int)sizeof(y), chain_setxattr(file, name2.c_str(), &y, sizeof(y)));
+
+ int buffer_size = name1.size() + sizeof('\0') + name2.size() + sizeof('\0');
+ char* expected = (char*)malloc(buffer_size);
+ ::strcpy(expected, name1.c_str());
+ ::strcpy(expected + name1.size() + 1, name2.c_str());
+ char* actual = (char*)calloc(1, buffer_size);
+ ASSERT_LT(buffer_size, chain_listxattr(file, NULL, 0)); // size evaluation is conservative
+ chain_listxattr(file, actual, buffer_size);
+ ::memset(actual, '\0', buffer_size);
+ chain_flistxattr(fd, actual, buffer_size);
+ ASSERT_EQ(0, ::memcmp(expected, actual, buffer_size));
+
+ int unlikely_to_be_a_valid_fd = 400;
+ ASSERT_GT(0, chain_listxattr("UNLIKELY_TO_EXIST", actual, 0));
+ ASSERT_GT(0, chain_listxattr("UNLIKELY_TO_EXIST", actual, buffer_size));
+ ASSERT_GT(0, chain_flistxattr(unlikely_to_be_a_valid_fd, actual, 0));
+ ASSERT_GT(0, chain_flistxattr(unlikely_to_be_a_valid_fd, actual, buffer_size));
+ ASSERT_EQ(-ERANGE, chain_listxattr(file, actual, 1));
+ ASSERT_EQ(-ERANGE, chain_flistxattr(fd, actual, 1));
+
+ ASSERT_EQ(0, chain_removexattr(file, name1.c_str()));
+ ASSERT_EQ(0, chain_removexattr(file, name2.c_str()));
+
+ ::unlink(file);
+}
+
+int main(int argc, char **argv) {
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+ common_init_finish(g_ceph_context);
+ g_ceph_context->_conf->set_val("err_to_stderr", "false");
+ g_ceph_context->_conf->set_val("log_to_stderr", "false");
+ g_ceph_context->_conf->apply_changes(NULL);
+
+ const char* file = "testfile";
+ int x = 1234;
+ int y = 0;
+ int tmpfd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC, 0700);
+ int ret = ::ceph_os_fsetxattr(tmpfd, "user.test", &x, sizeof(x));
+ if (ret >= 0)
+ ret = ::ceph_os_fgetxattr(tmpfd, "user.test", &y, sizeof(y));
+ ::close(tmpfd);
+ ::unlink(file);
+ if ((ret < 0) || (x != y)) {
+ cerr << "SKIP all tests because extended attributes don't appear to work in the file system in which the tests are run: " << cpp_strerror(ret) << std::endl;
+ } else {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+ }
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_chain_xattr ; valgrind --tool=memcheck ./unittest_chain_xattr # --gtest_filter=chain_xattr.get_and_set"
+// End:
diff --git a/src/test/filestore/run_seed_to.sh b/src/test/filestore/run_seed_to.sh
index f79874e156e..fdf56141e12 100755
--- a/src/test/filestore/run_seed_to.sh
+++ b/src/test/filestore/run_seed_to.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# vim: ts=8 sw=2 smarttab
#
-# run_seed_to.sh - Run test_filestore_idempotent_sequence up until an
+# run_seed_to.sh - Run ceph_test_filestore_idempotent_sequence up until an
# injection point, generating a sequence of operations based on a
# provided seed.
#
@@ -244,13 +244,13 @@ do
fi
do_rm $tmp_name_a $tmp_name_a.fail $tmp_name_a.recover
- $v test_filestore_idempotent_sequence run-sequence-to $to \
+ $v ceph_test_filestore_idempotent_sequence run-sequence-to $to \
$tmp_name_a $tmp_name_a/journal \
--filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
--filestore-kill-at $killat $tmp_opts_a \
--log-file $tmp_name_a.fail --debug-filestore 20 || true
- stop_at=`test_filestore_idempotent_sequence get-last-op \
+ stop_at=`ceph_test_filestore_idempotent_sequence get-last-op \
$tmp_name_a $tmp_name_a/journal \
--filestore-xattr-use-omap --log-file $tmp_name_a.recover \
--debug-filestore 20 --debug-journal 20`
@@ -263,12 +263,12 @@ do
echo stopped at $stop_at
do_rm $tmp_name_b $tmp_name_b.clean
- $v test_filestore_idempotent_sequence run-sequence-to \
+ $v ceph_test_filestore_idempotent_sequence run-sequence-to \
$stop_at $tmp_name_b $tmp_name_b/journal \
--filestore-xattr-use-omap --test-seed $seed --osd-journal-size 100 \
--log-file $tmp_name_b.clean --debug-filestore 20 $tmp_opts_b
- if $v test_filestore_idempotent_sequence diff \
+ if $v ceph_test_filestore_idempotent_sequence diff \
$tmp_name_a $tmp_name_a/journal $tmp_name_b $tmp_name_b/journal \
--filestore-xattr-use-omap; then
echo OK
diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc
index 6c9d2bb65be..fcc69d85f0b 100644
--- a/src/test/mon/test_mon_workloadgen.cc
+++ b/src/test/mon/test_mon_workloadgen.cc
@@ -308,7 +308,7 @@ class OSDStub : public TestStub
boost::uniform_int<> mon_osd_rng;
utime_t last_boot_attempt;
- static const double STUB_BOOT_INTERVAL = 10.0;
+ static const double STUB_BOOT_INTERVAL;
public:
@@ -477,7 +477,7 @@ class OSDStub : public TestStub
}
void auto_create_pgs() {
- bool has_pgs = (pgs.size() > 0);
+ bool has_pgs = !pgs.empty();
dout(10) << __func__
<< ": " << (has_pgs ? "has pgs; ignore" : "create pgs") << dendl;
if (has_pgs)
@@ -585,7 +585,7 @@ class OSDStub : public TestStub
void modify_pgs() {
dout(10) << __func__ << dendl;
- if (pgs.size() == 0) {
+ if (pgs.empty()) {
dout(1) << __func__
<< " no pgs available! don't attempt to modify." << dendl;
return;
@@ -654,7 +654,7 @@ class OSDStub : public TestStub
dout(10) << __func__ << dendl;
modify_pgs();
- if (pgs_changes.size() > 0)
+ if (!pgs_changes.empty())
send_pg_stats();
monc.sub_want("osd_pg_creates", 0, CEPH_SUBSCRIBE_ONETIME);
monc.renew_subs();
@@ -902,6 +902,7 @@ class OSDStub : public TestStub
}
};
+double const OSDStub::STUB_BOOT_INTERVAL = 10.0;
#undef dout_prefix
#define dout_prefix *_dout << "main "
@@ -988,7 +989,7 @@ int main(int argc, const char *argv[])
global_init(&def_args, args,
CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY,
- CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ 0);
common_init_finish(g_ceph_context);
g_ceph_context->_conf->apply_changes(NULL);
@@ -1032,7 +1033,7 @@ int main(int argc, const char *argv[])
}
}
- if (stub_ids.size() == 0) {
+ if (stub_ids.empty()) {
std::cerr << "** error: must specify at least one '--stub-id <ID>'"
<< std::endl;
usage();
diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h
index 21112d1067f..be9ff4bccd6 100644
--- a/src/test/osd/RadosModel.h
+++ b/src/test/osd/RadosModel.h
@@ -215,7 +215,7 @@ public:
state_lock.Lock();
TestOp *next = gen->next(*this);
- while (next || inflight.size()) {
+ while (next || !inflight.empty()) {
if (next) {
inflight.push_back(next);
}
@@ -235,7 +235,7 @@ public:
}
}
- if (inflight.size() >= (unsigned) max_in_flight || (!next && inflight.size())) {
+ if (inflight.size() >= (unsigned) max_in_flight || (!next && !inflight.empty())) {
cout << "Waiting on " << inflight.size() << std::endl;
wait();
} else {
@@ -499,7 +499,7 @@ public:
op.rmxattr(i->first.c_str());
}
}
- if (!to_remove.size()) {
+ if (to_remove.empty()) {
context->kick();
context->oid_in_use.erase(oid);
context->oid_not_in_use.insert(oid);
diff --git a/src/test/run-rbd-tests b/src/test/run-rbd-tests
index fe8fa4dd505..d3c8b9e98ca 100755
--- a/src/test/run-rbd-tests
+++ b/src/test/run-rbd-tests
@@ -26,11 +26,11 @@ run_api_tests() {
# skip many_snaps since it takes several minutes
# skip remove_with_watcher until #2533 is fixed
nosetests -v test_rbd -e '.*many_snaps' -e '.*remove_with_watcher'
- # test_librbd creates its own pools
- test_librbd
+ # ceph_test_librbd creates its own pools
+ ceph_test_librbd
}
-test_cls_rbd
+ceph_test_cls_rbd
run_api_tests
run_cli_tests
diff --git a/src/test/test_filejournal.cc b/src/test/test_filejournal.cc
index 5b7576dea39..7df5c806949 100644
--- a/src/test/test_filejournal.cc
+++ b/src/test/test_filejournal.cc
@@ -70,11 +70,11 @@ int main(int argc, char **argv) {
finisher = new Finisher(g_ceph_context);
- if (args.size()) {
+ if (!args.empty()) {
strcpy(path, args[0]);
} else {
srand(getpid()+time(0));
- snprintf(path, sizeof(path), "/tmp/test_filejournal.tmp.%d", rand());
+ snprintf(path, sizeof(path), "/tmp/ceph_test_filejournal.tmp.%d", rand());
}
cout << "path " << path << std::endl;
diff --git a/src/test/test_mutate.cc b/src/test/test_mutate.cc
index f2feda11d48..b9e0d717664 100644
--- a/src/test/test_mutate.cc
+++ b/src/test/test_mutate.cc
@@ -48,7 +48,7 @@ int main(int argc, const char **argv)
common_init_finish(g_ceph_context);
string val;
- string oid("test_object");
+ string oid("ceph_test_object");
string pool_name("test_pool");
for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
if (ceph_argparse_double_dash(args, i)) {
diff --git a/src/tools/ceph-filestore-dump.cc b/src/tools/ceph-filestore-dump.cc
index 2dfa1e539ff..fbd227dc25f 100644
--- a/src/tools/ceph-filestore-dump.cc
+++ b/src/tools/ceph-filestore-dump.cc
@@ -167,14 +167,19 @@ int main(int argc, char **argv)
ObjectStore *fs = new FileStore(fspath, jpath);
- if (fs->mount() < 0) {
- cout << "mount failed" << std::endl;
+ int r = fs->mount();
+ if (r < 0) {
+ if (r == -EBUSY) {
+ cout << "OSD has the store locked" << std::endl;
+ } else {
+ cout << "Mount failed with '" << cpp_strerror(-r) << "'" << std::endl;
+ }
return 1;
}
bool found = false;
vector<coll_t> ls;
- int r = fs->list_collections(ls);
+ r = fs->list_collections(ls);
if (r < 0) {
cerr << "failed to list pgs: " << cpp_strerror(-r) << std::endl;
exit(1);
@@ -199,24 +204,29 @@ int main(int argc, char **argv)
continue;
}
+ //XXX: This needs OSD function to generate
+ hobject_t infos_oid(sobject_t("infos", CEPH_NOSNAP));
bufferlist bl;
- epoch_t map_epoch = PG::peek_map_epoch(fs, coll, &bl);
+ epoch_t map_epoch = PG::peek_map_epoch(fs, coll, infos_oid, &bl);
(void)map_epoch;
found = true;
- pg_info_t info;
+ pg_info_t info(pgid);
map<epoch_t,pg_interval_t> past_intervals;
hobject_t biginfo_oid = OSD::make_pg_biginfo_oid(pgid);
interval_set<snapid_t> snap_collections;
+ __u8 struct_v;
int r = PG::read_info(fs, coll, bl, info, past_intervals, biginfo_oid,
- snap_collections);
+ infos_oid, snap_collections, struct_v);
if (r < 0) {
cerr << "read_info error " << cpp_strerror(-r) << std::endl;
ret = 1;
continue;
}
+ if (vm.count("debug"))
+ cout << "struct_v " << (int)struct_v << std::endl;
if (type == "info") {
formatter->open_object_section("info");
diff --git a/src/tools/common.cc b/src/tools/common.cc
index 13aff67ac6e..514d33d902f 100644
--- a/src/tools/common.cc
+++ b/src/tools/common.cc
@@ -260,7 +260,7 @@ int do_command(CephToolCtx *ctx,
pending_tell_pgid = false;
reply = false;
- if (cmd.size() > 0 && cmd[0] == "tell") {
+ if (!cmd.empty() && cmd[0] == "tell") {
if (cmd.size() == 1) {
cerr << "no tell target specified" << std::endl;
return -EINVAL;
@@ -272,7 +272,7 @@ int do_command(CephToolCtx *ctx,
pending_cmd.erase(pending_cmd.begin(), pending_cmd.begin() + 2);
pending_tell = true;
}
- if (cmd.size() > 0 && cmd[0] == "pg") {
+ if (!cmd.empty() && cmd[0] == "pg") {
if (cmd.size() == 1) {
cerr << "pg requires at least one argument" << std::endl;
return -EINVAL;
@@ -580,7 +580,7 @@ bool Admin::ms_dispatch(Message *m) {
void Admin::ms_handle_connect(Connection *con) {
if (con->get_peer_type() == CEPH_ENTITY_TYPE_MON) {
ctx->lock.Lock();
- if (pending_cmd.size())
+ if (!pending_cmd.empty())
send_command(ctx);
ctx->lock.Unlock();
}
@@ -592,7 +592,7 @@ bool Admin::ms_handle_reset(Connection *con)
if (con == command_con) {
command_con->put();
command_con = NULL;
- if (pending_cmd.size())
+ if (!pending_cmd.empty())
send_command(ctx);
return true;
}
diff --git a/src/tools/rest_bench.cc b/src/tools/rest_bench.cc
index 8e8649d213e..9813ba3e12d 100644
--- a/src/tools/rest_bench.cc
+++ b/src/tools/rest_bench.cc
@@ -246,7 +246,7 @@ class RESTDispatcher {
}
void _dump_queue() {
deque<req_context *>::iterator iter;
- if (dispatcher->m_req_queue.size() == 0) {
+ if (dispatcher->m_req_queue.empty()) {
generic_dout(20) << "DispatcherWQ: empty" << dendl;
return;
}
@@ -735,7 +735,7 @@ int main(int argc, const char **argv)
cerr << "rest-bench: bucket not specified" << std::endl;
usage_exit();
}
- if (args.size() < 1)
+ if (args.empty())
usage_exit();
int operation = 0;
const char *prefix = NULL;
diff --git a/src/unittest_bufferlist.sh b/src/unittest_bufferlist.sh
new file mode 100755
index 00000000000..0f05afe07b7
--- /dev/null
+++ b/src/unittest_bufferlist.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Ceph - scalable distributed file system
+#
+# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+CEPH_BUFFER_TRACK=true ./unittest_bufferlist
diff --git a/src/upstart/ceph-hotplug.conf b/src/upstart/ceph-hotplug.conf
deleted file mode 100644
index 702045293a2..00000000000
--- a/src/upstart/ceph-hotplug.conf
+++ /dev/null
@@ -1,11 +0,0 @@
-description "Ceph hotplug"
-
-start on block-device-added \
- DEVTYPE=partition \
- ID_PART_ENTRY_TYPE=4fbd7e29-9d25-41b8-afd0-062c0ceff05d
-stop on runlevel [!2345]
-
-task
-instance $DEVNAME
-
-exec /usr/sbin/ceph-disk-activate --mount -- "$DEVNAME"
diff --git a/src/upstart/ceph-osd.conf b/src/upstart/ceph-osd.conf
index 23ca2eb2a23..ff05fdfc006 100644
--- a/src/upstart/ceph-osd.conf
+++ b/src/upstart/ceph-osd.conf
@@ -15,28 +15,31 @@ pre-start script
install -d -m0755 /var/run/ceph
- # update location in crush; put in some suitable defaults on the
- # command line, ceph.conf can override what it wants
- location="$(ceph-conf --cluster="${cluster:-ceph}" --name="osd.$id" --lookup osd_crush_location || :)"
- weight="$(ceph-conf --cluster="$cluster" --name="osd.$id" --lookup osd_crush_initial_weight || :)"
- ceph \
- --cluster="${cluster:-ceph}" \
- --name="osd.$id" \
- --keyring="/var/lib/ceph/osd/${cluster:-ceph}-$id/keyring" \
- osd crush create-or-move \
- -- \
- "$id" \
- "${weight:-1}" \
- root=default \
- host="$(hostname -s)" \
- $location \
- || :
+ update="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_update_on_start || :)"
+ if [ "${update:-1}" = "1" -o "{$update:-1}" = "true" ]; then
+ # update location in crush; put in some suitable defaults on the
+ # command line, ceph.conf can override what it wants
+ location="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_location || :)"
+ weight="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_initial_weight || :)"
+ ceph \
+ --cluster="${cluster:-ceph}" \
+ --name="osd.$id" \
+ --keyring="/var/lib/ceph/osd/${cluster:-ceph}-$id/keyring" \
+ osd crush create-or-move \
+ -- \
+ "$id" \
+ "${weight:-1}" \
+ root=default \
+ host="$(hostname -s)" \
+ $location \
+ || :
+ fi
journal="/var/lib/ceph/osd/${cluster:-ceph}-$id/journal"
if [ -L "$journal" -a ! -e "$journal" ]; then
- echo "ceph-osd($UPSTART_INSTANCE): journal not present, not starting yet." 1>&2
- stop
- exit 0
+ echo "ceph-osd($UPSTART_INSTANCE): journal not present, not starting yet." 1>&2
+ stop
+ exit 0
fi
end script
diff --git a/udev/95-ceph-osd.rules b/udev/95-ceph-osd.rules
new file mode 100644
index 00000000000..a6fcaea8823
--- /dev/null
+++ b/udev/95-ceph-osd.rules
@@ -0,0 +1,21 @@
+# activate ceph-tagged partitions
+ACTION=="add", SUBSYSTEM=="block", \
+ ENV{DEVTYPE}=="partition", \
+ ENV{ID_PART_ENTRY_TYPE}=="4fbd7e29-9d25-41b8-afd0-062c0ceff05d", \
+ RUN+="/usr/sbin/ceph-disk-activate --mount /dev/$name"
+
+# Map journal if using dm-crypt
+ACTION=="add" SUBSYSTEM=="block", \
+ ENV{DEVTYPE}=="partition", \
+ ENV{ID_PART_ENTRY_TYPE}=="45b0969e-9b03-4f30-b4c6-5ec00ceff106", \
+ RUN+="/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/$env{ID_PART_ENTRY_UUID} --key-size 256 create $env{ID_PART_ENTRY_UUID} /dev/$name"
+
+# Map data device and
+# activate ceph-tagged partitions
+# for dm-crypted data devices
+ACTION=="add" SUBSYSTEM=="block", \
+ ENV{DEVTYPE}=="partition", \
+ ENV{ID_PART_ENTRY_TYPE}=="4fbd7e29-9d25-41b8-afd0-5ec00ceff05d", \
+ RUN+="/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/$env{ID_PART_ENTRY_UUID} --key-size 256 create $env{ID_PART_ENTRY_UUID} /dev/$name", \
+ RUN+="bash -c 'while [ ! -e /dev/mapper/$env{ID_PART_ENTRY_UUID} ];do sleep 1; done'", \
+ RUN+="/usr/sbin/ceph-disk-activate --mount /dev/mapper/$env{ID_PART_ENTRY_UUID}"
diff --git a/wireshark/ceph/packet-ceph.c b/wireshark/ceph/packet-ceph.c
index 5d2c702251a..4379fda0fe0 100644
--- a/wireshark/ceph/packet-ceph.c
+++ b/wireshark/ceph/packet-ceph.c
@@ -209,8 +209,8 @@ static gint ett_ceph_footer = -1;
const char *ceph_cap_op_name(int op)
{
- char* plop = malloc(16*sizeof(char));
- sprintf(plop,"%i",op);
+ char* plop;
+
switch (op) {
case CEPH_CAP_OP_GRANT: return "grant";
case CEPH_CAP_OP_REVOKE: return "revoke";
@@ -226,13 +226,17 @@ const char *ceph_cap_op_name(int op)
case CEPH_CAP_OP_RELEASE: return "release";
case CEPH_CAP_OP_RENEW: return "renew";
}
+
+ plop = malloc(16*sizeof(char));
+ sprintf(plop,"%i",op);
+
return plop;
}
const char *ceph_mds_op_name(int op)
{
- char* plop = malloc(16*sizeof(char));
- sprintf(plop,"%i",op);
+ char* plop;
+
switch (op) {
case CEPH_MDS_OP_LOOKUP: return "lookup";
case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
@@ -261,6 +265,10 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
}
+
+ plop = malloc(16*sizeof(char));
+ printf(plop,"%i",op);
+
return plop;
}
@@ -478,7 +486,7 @@ void proto_register_ceph (void)
static guint32 dissect_sockaddr_in(tvbuff_t *tvb, proto_tree *tree, guint32 offset)
{
- proto_tree *ceph_sockaddr_tree = NULL;
+ proto_tree *ceph_sockaddr_tree;
proto_item *ceph_sub_item = NULL;
proto_item *ceph_item = proto_tree_get_parent(tree);
@@ -533,13 +541,14 @@ static guint32 dissect_ceph_fsid(tvbuff_t *tvb, proto_tree *tree, guint32 offset
fsid_dec = malloc(4*sizeof(guint32));
fsid = *(struct ceph_fsid *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_fsid));
memcpy(fsid_dec,fsid.fsid,4*sizeof(guint32));
- proto_tree_add_text(tree, tvb, offset,sizeof(struct ceph_fsid), "fsid: %x-%x-%x-%x",
+ proto_tree_add_text(tree, tvb, offset, sizeof(struct ceph_fsid), "fsid: %x-%x-%x-%x",
ntohl(fsid_dec[0]),
ntohl(fsid_dec[1]),
ntohl(fsid_dec[2]),
ntohl(fsid_dec[3])
);
offset += sizeof(struct ceph_fsid);
+ free (fsid_dec);
return offset;
}
@@ -572,11 +581,11 @@ static guint32 dissect_ceph_footer(tvbuff_t *tvb, proto_tree *tree, guint32 offs
static guint32 dissect_ceph_client_connect(tvbuff_t *tvb, proto_tree *tree, guint32 offset)
{
- proto_tree *ceph_header_tree = NULL;
+ proto_tree *ceph_header_tree;
proto_item *ceph_sub_item = NULL;
proto_item *ceph_item = proto_tree_get_parent(tree);
struct ceph_msg_connect *msg;
- guint32 auth_len = 0;
+ guint32 auth_len;
offset = dissect_ceph_banner(tvb, tree, offset);
@@ -614,7 +623,7 @@ static guint32 dissect_ceph_client_connect(tvbuff_t *tvb, proto_tree *tree, guin
static guint32 dissect_ceph_server_connect(tvbuff_t *tvb, proto_tree *tree, guint32 offset)
{
- proto_tree *ceph_header_tree = NULL;
+ proto_tree *ceph_header_tree;
proto_item *ceph_sub_item = NULL;
proto_item *ceph_item = proto_tree_get_parent(tree);
struct ceph_msg_connect_reply *msg;
@@ -1084,7 +1093,7 @@ static guint32 dissect_ceph_front(tvbuff_t *tvb, packet_info *pinfo, proto_tree
static guint32 dissect_ceph_generic(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset)
{
- proto_tree *ceph_header_tree = NULL;
+ proto_tree *ceph_header_tree;
proto_item *ceph_sub_item = NULL;
proto_item *ceph_item = proto_tree_get_parent(tree);
guint32 front_len, middle_len, data_len;
@@ -1094,12 +1103,12 @@ static guint32 dissect_ceph_generic(tvbuff_t *tvb, packet_info *pinfo, proto_tre
guint16 type;
guint64 seq;
struct ceph_msg_header *header;
- unsigned int data_crc = 0;
+ unsigned int data_crc = 0;
- tag = tvb_get_guint8(tvb, offset);
- hlen = ( tag == CEPH_MSGR_TAG_ACK ) ? ACK_MSG_SIZE:0;
- hlen += sizeof(struct ceph_msg_header);
- hlen++;
+ tag = tvb_get_guint8(tvb, offset);
+ hlen = ( tag == CEPH_MSGR_TAG_ACK ) ? ACK_MSG_SIZE:0;
+ hlen += sizeof(struct ceph_msg_header);
+ hlen++;
ceph_header_tree = proto_item_add_subtree(ceph_item, ett_ceph);