summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--COPYING5
-rw-r--r--ceph.spec.in3
-rw-r--r--configure.ac15
-rw-r--r--debian/control3
-rw-r--r--debian/copyright5
-rw-r--r--doc/changelog/v0.67.2.txt207
-rw-r--r--doc/dev/osd_internals/erasure_coding.rst5
-rw-r--r--doc/dev/osd_internals/erasure_coding/PGBackend-h.rst5
-rw-r--r--doc/dev/osd_internals/erasure_coding/developer_notes.rst21
-rw-r--r--doc/install/upgrading-ceph.rst57
-rw-r--r--doc/man/8/monmaptool.rst2
-rw-r--r--doc/rados/deployment/ceph-deploy-mon.rst7
-rw-r--r--doc/release-notes.rst24
-rwxr-xr-xqa/workunits/suites/fsstress.sh15
-rw-r--r--src/Makefile.am51
-rw-r--r--src/arch/intel.c46
-rw-r--r--src/arch/intel.h16
-rw-r--r--src/arch/probe.cc20
-rw-r--r--src/arch/probe.h16
-rw-r--r--src/common/config_opts.h6
-rw-r--r--src/common/crc32c-intel.c113
-rw-r--r--src/common/crc32c.cc41
-rw-r--r--src/common/crc32c_intel_baseline.c126
-rw-r--r--src/common/crc32c_intel_baseline.h14
-rw-r--r--src/common/crc32c_intel_fast.c30
-rw-r--r--src/common/crc32c_intel_fast.h28
-rw-r--r--src/common/crc32c_intel_fast_asm.S664
-rw-r--r--src/common/sctp_crc32.c2
-rw-r--r--src/common/sctp_crc32.h14
-rw-r--r--src/common/sharedptr_registry.hpp20
-rw-r--r--src/include/buffer.h2
-rw-r--r--src/include/crc32c.h30
-rw-r--r--src/init-ceph.in4
-rwxr-xr-xsrc/init-rbdmap2
-rw-r--r--src/librados-config.cc3
-rw-r--r--src/librados/AioCompletionImpl.h22
-rw-r--r--src/mds/MDCache.cc2
-rw-r--r--src/mds/flock.cc5
-rw-r--r--src/mon/MonCap.cc2
-rw-r--r--src/mon/MonCommands.h2
-rw-r--r--src/mon/Monitor.cc1
-rw-r--r--src/mon/PGMap.cc13
-rw-r--r--src/mon/PGMap.h1
-rw-r--r--src/mon/PGMonitor.cc5
-rw-r--r--src/mon/Paxos.cc2
-rw-r--r--src/msg/Message.h4
-rw-r--r--src/msg/Pipe.cc8
-rw-r--r--src/os/BtrfsFileStoreBackend.cc29
-rw-r--r--src/osd/OSD.cc3
-rw-r--r--src/osd/OSDCap.cc2
-rw-r--r--src/osd/PG.cc3
-rw-r--r--src/osd/PG.h9
-rw-r--r--src/osd/ReplicatedPG.cc338
-rw-r--r--src/osd/ReplicatedPG.h105
-rw-r--r--src/osd/Watch.cc18
-rw-r--r--src/osd/Watch.h10
-rw-r--r--src/osd/osd_types.h29
-rw-r--r--src/osdc/Objecter.cc17
-rw-r--r--src/rgw/rgw_admin.cc4
-rw-r--r--src/rgw/rgw_cache.h9
-rw-r--r--src/rgw/rgw_rados.cc11
-rw-r--r--src/rgw/rgw_rados.h1
-rw-r--r--src/test/cli/radosgw-admin/help.t1
-rw-r--r--src/test/mon/moncap.cc1
-rw-r--r--src/test/osd/osdcap.cc1
-rw-r--r--src/test/test_osd_types.cc9
-rw-r--r--src/tools/ceph-monstore-tool.cc3
-rwxr-xr-xsrc/yasm-wrapper38
68 files changed, 1812 insertions, 518 deletions
diff --git a/COPYING b/COPYING
index 5e18e66bf5a..28d88ebb7fa 100644
--- a/COPYING
+++ b/COPYING
@@ -26,6 +26,11 @@ Files: m4/acx_pthread.m4
Copyright: Steven G. Johnson <stevenj@alum.mit.edu>
License: GPLWithACException
+Files: src/common/crc32c_intel*:
+Copyright:
+ Copyright 2012-2013 Intel Corporation All Rights Reserved.
+License: BSD 3-clause
+
Files: src/common/sctp_crc32.c:
Copyright:
Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
diff --git a/ceph.spec.in b/ceph.spec.in
index 82c9d073980..11a962d8bdc 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -42,6 +42,7 @@ BuildRequires: libcurl-devel
BuildRequires: libxml2-devel
BuildRequires: libuuid-devel
BuildRequires: leveldb-devel > 1.2
+BuildRequires: yasm
%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
BuildRequires: snappy-devel
%endif
@@ -249,7 +250,7 @@ BuildRequires: junit
%description -n cephfs-java
This package contains the Java libraries for the Ceph File System.
-%if (0%{?centos} || 0%{?opensuse} || 0%{?suse_version})
+%if 0%{?opensuse} || 0%{?suse_version}
%debug_package
%endif
diff --git a/configure.ac b/configure.ac
index 6f3cc6a9f5f..5dba606cf3c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -33,6 +33,8 @@ AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE
AM_PROG_CC_C_O
AM_PROG_LIBTOOL
+AM_PROG_AS
+
# enable make V=0 (if automake >1.11)
AM_INIT_AUTOMAKE([foreign])
@@ -68,6 +70,19 @@ AC_PROG_LIBTOOL
AC_SUBST(AM_CXXFLAGS)
AM_CXXFLAGS="${AM_CXXFLAGS}"
+# Check for yasm
+if yasm -f elf64 src/common/crc32c_intel_fast_asm.S -o /dev/null; then
+ echo 'we have a modern and working yasm'
+ if test `arch` = "x86_64"; then
+ echo 'we are x86_64'
+ AC_DEFINE([HAVE_GOOD_YASM_ELF64], [1], [we have a recent yasm and are x86_64])
+ with_good_yasm=yes
+ fi
+else
+ echo 'we do not have a modern/working yasm'
+fi
+AM_CONDITIONAL(WITH_GOOD_YASM_ELF64, test "$with_good_yasm" = "yes")
+
# Checks for compiler warning types
# AC_CHECK_CC_FLAG(FLAG_TO_TEST, VARIABLE_TO_SET_IF_SUPPORTED)
diff --git a/debian/control b/debian/control
index 195cb37fe62..44ee725efd4 100644
--- a/debian/control
+++ b/debian/control
@@ -34,7 +34,8 @@ Build-Depends: autoconf,
libxml2-dev,
pkg-config,
python (>= 2.6.6-3~),
- uuid-dev
+ uuid-dev,
+ yasm
Standards-Version: 3.9.3
Package: ceph
diff --git a/debian/copyright b/debian/copyright
index 0dc2160ae9c..aa91a149853 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -27,6 +27,11 @@ Files: m4/acx_pthread.m4
Copyright: Steven G. Johnson <stevenj@alum.mit.edu>
License: GPLWithACException
+Files: src/common/crc32c_intel*:
+Copyright:
+ Copyright 2012-2013 Intel Corporation All Rights Reserved.
+License: BSD 3-clause
+
Files: src/common/sctp_crc32.c:
Copyright:
Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
diff --git a/doc/changelog/v0.67.2.txt b/doc/changelog/v0.67.2.txt
new file mode 100644
index 00000000000..e5d0f3d5e5b
--- /dev/null
+++ b/doc/changelog/v0.67.2.txt
@@ -0,0 +1,207 @@
+commit eb4380dd036a0b644c6283869911d615ed729ac8
+Author: Gary Lowell <gary.lowell@inktank.com>
+Date: Thu Aug 22 19:10:55 2013 -0700
+
+ v0.67.2
+
+commit 242e43dae5b7c935b8f92c09e8dfe4704ba13787
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Aug 9 12:49:57 2013 -0700
+
+ .gitignore: ignore test-driver
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit edf2c3449ec96d91d3d7ad01c50f7a79b7b2f7cc)
+
+commit 88aef702fb77c0a176caf37646a11ef480621412
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Aug 9 12:42:49 2013 -0700
+
+ fuse: fix warning when compiled against old fuse versions
+
+ client/fuse_ll.cc: In function 'void invalidate_cb(void*, vinodeno_t, int64_t, int64_t)':
+ warning: client/fuse_ll.cc:540: unused variable 'fino'
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 9833e9dabe010e538cb98c51d79b6df58ce28f9e)
+
+commit 48e104c9486f7a532455df108dbc225c00796097
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Aug 9 12:40:34 2013 -0700
+
+ json_spirit: remove unused typedef
+
+ In file included from json_spirit/json_spirit_writer.cpp:7:0:
+ json_spirit/json_spirit_writer_template.h: In function 'String_type json_spirit::non_printable_to_string(unsigned int)':
+ json_spirit/json_spirit_writer_template.h:37:50: warning: typedef 'Char_type' locally defined but not used [-Wunused-local-typedefs]
+ typedef typename String_type::value_type Char_type;
+
+ (Also, ha ha, this file uses \r\n.)
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 6abae35a3952e5b513895267711fea63ff3bad09)
+
+commit ae42619ca710d737bf4d8c63f39d1102326c903c
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Aug 9 12:31:41 2013 -0700
+
+ gtest: add build-aux/test-driver to .gitignore
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit c9cdd19d1cd88b84e8a867f5ab85cb51fdc6f8e4)
+
+commit 2c122be08db2f233d66214eb804734ae45646084
+Author: Josh Durgin <josh.durgin@inktank.com>
+Date: Wed Aug 21 14:28:49 2013 -0700
+
+ objecter: resend unfinished lingers when osdmap is no longer paused
+
+ Plain Ops that haven't finished yet need to be resent if the osdmap
+ transitions from full or paused to unpaused. If these Ops are
+ triggered by LingerOps, they will be cancelled instead (since
+ should_resend = false), but the LingerOps that triggered them will not
+ be resent.
+
+ Fix this by checking the registered flag for all linger ops, and
+ resending any of them that aren't paused anymore.
+
+ Fixes: #6070
+ Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
+ Reviewed-by: Sage Weil <sage.weil@inktank.com>
+ (cherry picked from commit 38a0ca66a79af4b541e6322467ae3a8a4483cc72)
+
+commit f6fe74ff51f679e7245b02462822d9ef1e15d28c
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Aug 20 11:23:46 2013 -0700
+
+ pybind: fix Rados.conf_parse_env test
+
+ This happens after we connect, which means we get ENOSYS always.
+ Instead, parse_env inside the normal setup method, which had the added
+ benefit of being able to debug these tests.
+
+ Backport: dumpling
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 6ef1970340c57d6e02f947348fb38882b51d131c)
+
+commit 47c89497b7f69cbf1557cd05b89837c388e2ba2f
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Aug 13 13:14:59 2013 -0700
+
+ librados: fix MWatchNotify leak
+
+ Do not leak the message if the watcher is not registered. Also, simplify
+ this block.
+
+ Fixes (part of): #5949
+ Backport: dumpling, cuttlefish
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit 6f5d8036f3e70c5e30edf7e36fb8ff9a56197f60)
+
+commit b3a9a8c4e5edff5431d8da71033047eced6bf985
+Author: Samuel Just <sam.just@inktank.com>
+Date: Mon Aug 19 17:23:44 2013 -0700
+
+ PG: remove old log when we upgrade log version
+
+ Otherwise the log_oid will be non-empty and the next
+ boot will cause us to try to upgrade again.
+
+ Fixes: #6057
+ Signed-off-by: Samuel Just <sam.just@inktank.com>
+ Reviewed-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 1f851cb2489a95526de932ec6734ebf413e43490)
+
+commit c6005ccbaa482c62d7a6cbb387bdcf17f0e308d5
+Author: Samuel Just <sam.just@inktank.com>
+Date: Mon Aug 19 00:02:24 2013 -0700
+
+ PGLog: add a config to disable PGLog::check()
+
+ This is a debug check which may be causing excessive
+ cpu usage.
+
+ Reviewed-by: Sage Weil <sage@inktank.com>
+ Signed-off-by: Samuel Just <sam.just@inktank.com>
+ (cherry picked from commit 00080d785f6695b800f71317a3048a21064e61cb)
+
+commit 96d719eeecceaa06078a29c2f868e50e6bc9ab31
+Author: Sage Weil <sage@inktank.com>
+Date: Mon Aug 19 12:48:50 2013 -0700
+
+ ceph: parse CEPH_ARGS environment variable
+
+ Fixes: #6052
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Dan Mick <dan.mick@inktank.com>
+ (cherry picked from commit 67a95b9880c9bc6e858150352318d68d64ed74ad)
+
+commit d348cf5d135d099fe0490c1519196cd83a04831e
+Author: Sage Weil <sage@inktank.com>
+Date: Mon Aug 19 12:48:40 2013 -0700
+
+ rados pybind: add conf_parse_env()
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Dan Mick <dan.mick@inktank.com>
+ (cherry picked from commit eef7cacdb19313907a9367187b742db5382ee584)
+
+commit 290bcd8a718887eb0e28aa2d97bceeee79068ea9
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date: Tue Aug 13 13:16:07 2013 -0700
+
+ rgw: drain requests before exiting
+
+ Fixes: #5953
+
+ Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+ Reviewed-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 3cbf6a7b031c2ce8072733c5c0b7ceb53fdcb090)
+
+commit 863df08a43dff99797453040eb1ef6071b0432f9
+Author: Sage Weil <sage@inktank.com>
+Date: Tue Aug 13 11:16:17 2013 -0700
+
+ rgw: do not leak handler in get_handler() error path
+
+ If we fail to initialize, delete the handler.
+
+ Fixes (part of): #5949
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+ (cherry picked from commit 810c52de36719c3ee6cf2bdf59d5cde8840bbe55)
+
+commit 9ac003f793b6cc72059110aac44014ddf2372bee
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jul 26 23:20:54 2013 -0700
+
+ rgw: fix leak of RGWDataChangesLog::renew_thread
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 25948319c4d256c4aeb0137eb88947e54d14cc79)
+
+commit 89cd9dc403e97b4bd08920fbb5d6e2b8b9b7dac2
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jul 26 23:17:10 2013 -0700
+
+ rgw: free resolver on shutdown
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit a31356338b8ae55df59d829d9080ffad70b97d10)
+
+commit 5b26ca7fa5beb87cbbe6bbb26d70789ff2aa7661
+Author: Sage Weil <sage@inktank.com>
+Date: Fri Jul 26 23:22:20 2013 -0700
+
+ rgw: fix up signal handling
+
+ OMG libfcgi is annoying with shutdown and signals. You need to close
+ the fd *and* resend a signal to ensure that you kick the accept loop
+ hard enough to make it shut down.
+
+ Document this, and switch to the async signal handlers. Put them
+ tightly around the runtime loop as we do with other daemons.
+
+ Signed-off-by: Sage Weil <sage@inktank.com>
+ (cherry picked from commit 59b13cebee600dad2551d2c7dc3482b05eaf8b22)
diff --git a/doc/dev/osd_internals/erasure_coding.rst b/doc/dev/osd_internals/erasure_coding.rst
index bfc425251a8..cc1efe4b4bf 100644
--- a/doc/dev/osd_internals/erasure_coding.rst
+++ b/doc/dev/osd_internals/erasure_coding.rst
@@ -27,7 +27,8 @@ Glossary
Example:
::
- OSD 40 OSD 33
+
+ OSD 40 OSD 33
+-------------------------+ +-------------------------+
| shard 0 - PG 10 | | shard 1 - PG 10 |
|+------ object O -------+| |+------ object O -------+|
@@ -54,4 +55,4 @@ Table of content
High level design document <erasure_coding/pgbackend>
Developer notes <erasure_coding/developer_notes>
-
+ Draft PGBackend.h header <erasure_coding/PGBackend-h>
diff --git a/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst b/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst
index 7e1998382a0..b39cdb0e88e 100644
--- a/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst
+++ b/doc/dev/osd_internals/erasure_coding/PGBackend-h.rst
@@ -1,5 +1,10 @@
+===========
PGBackend.h
+===========
+
+Work in progress:
::
+
/**
* PGBackend
*
diff --git a/doc/dev/osd_internals/erasure_coding/developer_notes.rst b/doc/dev/osd_internals/erasure_coding/developer_notes.rst
index 496a4a99f76..d542fdb86e2 100644
--- a/doc/dev/osd_internals/erasure_coding/developer_notes.rst
+++ b/doc/dev/osd_internals/erasure_coding/developer_notes.rst
@@ -31,6 +31,7 @@ is stored as an attribute of the object. The chunk *1* contains *ABC*
and is stored on *OSD5*, the chunk *4* contains *XYY* and is stored on
*OSD3*.
::
+
+-------------------+
name | NYAN |
+-------------------+
@@ -82,6 +83,7 @@ could not be read because the *OSD4* is *out*. The decoding function
is called as soon as three chunks are read : *OSD2* was the slowest
and its chunk was not taken into account.
::
+
+-------------------+
name | NYAN |
+-------------------+
@@ -132,6 +134,7 @@ the payload into M+K chunks and send them to the OSDs in the acting
set. It is also responsible for maintaining an authoritative version
of the placement group logs.
::
+
primary
+---OSD 1---+
| log |
@@ -155,6 +158,7 @@ of the placement group logs.
An erasure coded placement group has been created with M = 2 + K = 1 and is supported by three OSDs, two for M and one for K. The acting set of the placement group is made of *OSD 1* *OSD 2* and *OSD 3*. An object has been encoded and stored in the OSDs : the chunk D1v1 (i.e. Data chunk number 1 version 1) is on *OSD 1*, D2v1 on *OSD 2* and P1v1 (i.e. Parity chunk number 1 version 1) on *OSD 3*. The placement group logs on each OSD are in synch at epoch 1 version 1 (i.e. 1,1).
::
+
primary
+---OSD 1---+
|+----+ log |
@@ -180,6 +184,7 @@ An erasure coded placement group has been created with M = 2 + K = 1 and is supp
*OSD 1* is the primary and receives a WRITE FULL from a client, meaning the payload is to replace the content of the object entirely, it is not a partial write that would only overwrite part of it. The version two of the object is created to override the version one. *OSD 1* encodes the payload into three chunks : D1v2 (i.e. Data chunk number 1 version 2) will be on *OSD 1*, D2v2 on *OSD 2* and P1v2 (i.e. Parity chunk number 1 version 2) on *OSD 3*. Each chunk is sent to the target OSD, including the primary OSD which is responsible for storing chunks in addition to handling write operations and maintaining an authoritative version of the placement group logs. When an OSD receives the message instructing it to write the chunk, it also creates a new entry in the placement group logs to reflect the change. For instance, as soon as *OSD 3* stores *P1v2*, it adds the entry 1,2 ( i.e. epoch 1, version 2 ) to its logs. Because the OSDs work asynchronously, some chunks may still be in flight ( such as *D2v2* ) while others are acknowledged and on disk ( such as *P1v1* and *D1v1* ).
::
+
primary
+---OSD 1---+
|+----+ log |
@@ -208,6 +213,7 @@ An erasure coded placement group has been created with M = 2 + K = 1 and is supp
If all goes well, the chunks are acknowledged on each OSD in the acting set and the *last_complete* pointer of the logs can move from *1,1* to *1,2* and the files used to store the chunks of the previous version of the object can be removed : *D1v1* on *OSD 1*, *D2v1* on *OSD 2* and *P1v1* on *OSD 3*.
::
+
+---OSD 1---+
| |
| DOWN |
@@ -234,6 +240,7 @@ If all goes well, the chunks are acknowledged on each OSD in the acting set and
But accidents happen. If *OSD 1* goes down while *D2v2* is still in flight, the version 2 of the object is partially written : *OSD 3* has one chunk but does not have enough to recover. It lost two chunks : *D1v2* and *D2v2* but the erasure coding parameters M = 2 + K = 1 requires that at least two chunks are available to rebuild the third. *OSD 4* becomes the new primary and finds that the *last_complete* log entry ( i.e. all objects before this entry were known to be available on all OSDs in the previous acting set ) is *1,1* and will be the head of the new authoritative log.
::
+
+---OSD 2---+
|+----+ log |
||D2v1| 1,1 |
@@ -252,6 +259,7 @@ But accidents happen. If *OSD 1* goes down while *D2v2* is still in flight, the
The log entry *1,2* found on *OSD 3* is divergent from the new authoritative log provided by *OSD 4* : it is discarded and the file containing the *P1v2* chunk is removed.
::
+
+---OSD 2---+
|+----+ log |
||D2v1| 1,1 |
@@ -275,6 +283,7 @@ Interrupted append
An object is coded in stripes as described above. In the case of a full write, and assuming the object size is not too large to encode it in memory, there is a single stripe. When appending to an existing object, the stripe size is retrieved from the attributes of the object and if the total size of the object is a multiple of the stripe size and the payload of the append message is lower or equal to the strip size, the following applies. It applies, for instance, when *rgw* writes an object with sequence of append instead of a single write.
::
+
primary
+---OSD 1---+
|+-s1-+ log |
@@ -298,6 +307,7 @@ An object is coded in stripes as described above. In the case of a full write, a
*OSD 1* is the primary and receives an APPEND from a client, meaning the payload is to be appended at the end of the object. *OSD 1* encodes the payload into three chunks : S2D1 (i.e. Stripe two data chunk number 1 ) will be in s1 ( shard 1 ) on *OSD 1*, S2D2 in s2 on *OSD 2* and S2P1 (i.e. Stripe two parity chunk number 1 ) in s3 on *OSD 3*. Each chunk is sent to the target OSD, including the primary OSD which is responsible for storing chunks in addition to handling write operations and maintaining an authoritative version of the placement group logs. When an OSD receives the message instructing it to write the chunk, it also creates a new entry in the placement group logs to reflect the change. For instance, as soon as *OSD 3* stores *S2P1*, it adds the entry 1,2 ( i.e. epoch 1, version 2 ) to its logs. The log entry also carries the nature of the operation: in this case 1,2 is an APPEND where 1,1 was a CREATE. Because the OSDs work asynchronously, some chunks may still be in flight ( such as *S2D2* ) while others are acknowledged and on disk ( such as *S2D1* and *S2P1* ).
::
+
+---OSD 1---+
| |
| DOWN |
@@ -323,6 +333,7 @@ An object is coded in stripes as described above. In the case of a full write, a
If *OSD 1* goes down while *S2D2* is still in flight, the payload is partially appended : s3 ( shard 3) in *OSD 3* has one chunk but does not have enough to recover because s1 and s2 don't have it. It lost two chunks : *S2D1* and *S2D2* but the erasure coding parameters M = 2 + K = 1 requires that at least two chunks are available to rebuild the third. *OSD 4* becomes the new primary and finds that the *last_complete* log entry ( i.e. all objects before this entry were known to be available on all OSDs in the previous acting set ) is *1,1* and will be the head of the new authoritative log.
::
+
+---OSD 2---+
|+-s2-+ log |
||S1D2| 1,1 |
@@ -341,9 +352,11 @@ If *OSD 1* goes down while *S2D2* is still in flight, the payload is partially a
The log entry *1,2* found on *OSD 3* is divergent from the new authoritative log provided by *OSD 4* : it is discarded and the file containing the *S2P1* chunk is truncated to the nearest multiple of the stripe size.
-`Erasure code library <http://tracker.ceph.com/issues/5878>`_
+Erasure code library
--------------------
+See also `the corresponding tracker issue <http://tracker.ceph.com/issues/5878>`_
+
Using `Reed-Solomon <https://en.wikipedia.org/wiki/Reed_Solomon>`_,
with parameters M+K object O is encoded by dividing it into chunks O1,
O2, ... OM and computing parity chunks P1, P2, ... PK. Any M chunks
@@ -377,6 +390,7 @@ Although Reed-Solomon is provided as a default, Ceph uses it via an
abstract API designed to allow each pool to choose the plugin that
implements it.
::
+
ceph osd pool create <pool> \
erasure-code-directory=<dir> \
erasure-code-plugin=<plugin>
@@ -387,12 +401,14 @@ The *<plugin>* is dynamically loaded from *<dir>* (defaults to
which is responsible for registering an object derived from
*ErasureCodePlugin* in the registry singleton :
::
+
registry.plugins[plugin_name] = new ErasureCodePluginExample();
The *ErasureCodePlugin* derived object must provide a factory method
from which the concrete implementation of the *ErasureCodeInterface*
object can be generated:
::
+
virtual int factory(ErasureCodeInterfaceRef *erasure_code,
const map<std::string,std::string> &parameters) {
*erasure_code = ErasureCodeInterfaceRef(new ErasureCodeExample(parameters));
@@ -402,6 +418,7 @@ object can be generated:
The *parameters* is the list of *key=value* pairs that were set when the pool
was created. Each *key* must be prefixed with erasure-code to avoid name collisions
::
+
ceph osd pool create <pool> \
erasure-code-directory=<dir> \ # mandatory
erasure-code-plugin=jerasure \ # mandatory
@@ -419,6 +436,7 @@ Erasure code jerasure plugin
The parameters interpreted by the jerasure plugin are:
::
+
ceph osd pool create <pool> \
erasure-code-directory=<dir> \ # plugin directory absolute path
erasure-code-plugin=jerasure \ # plugin name (only jerasure)
@@ -507,6 +525,7 @@ require to encode the first object and not all of them.
Objects can be further divided into stripes to reduce the overhead of
partial writes. For instance:
::
+
+-----------------------+
|+---------------------+|
|| stripe 0 ||
diff --git a/doc/install/upgrading-ceph.rst b/doc/install/upgrading-ceph.rst
index 839f315bcd4..6020cb6a237 100644
--- a/doc/install/upgrading-ceph.rst
+++ b/doc/install/upgrading-ceph.rst
@@ -202,19 +202,13 @@ Multi-MDS configurations with identical names must be adjusted accordingly to
give daemons unique names. If you run your cluster with one metadata server,
you can disregard this notice for now.
+
ceph-deploy
-----------
-The ceph-deploy tool is now the preferred method of provisioning new
-clusters. For existing clusters created via mkcephfs that would like
-to transition to the new tool, there is a migration path, documented
-at `Transitioning to ceph-deploy`_. Note that transitioning to
-ceph-deploy is not required; it is entirely acceptable to continue
-provisioning new OSDs and monitors using the previous methods.
-However, ceph-deploy streamlines these processes significantly.
-
-.. _Transitioning to ceph-deploy: ../../rados/deployment/ceph-deploy-transition
-
+The ``ceph-deploy`` tool is now the preferred method of provisioning new clusters.
+For existing clusters created via ``mkcephfs`` that would like to transition to the
+new tool, there is a migration path, documented at `Transitioning to ceph-deploy`_.
Cuttlefish to Dumpling
======================
@@ -248,7 +242,7 @@ Then add a new ``ceph.repo`` repository entry with the following contents.
gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
-.. important:: Ensure you use the correct URL for your distribution. Check the
+.. note:: Ensure you use the correct URL for your distribution. Check the
http://ceph.com/rpm directory for your distribution.
.. note:: Since you can upgrade using ``ceph-deploy`` you will only need to add
@@ -387,8 +381,8 @@ To upgrade a Ceph OSD Daemon, perform the following steps:
sudo restart ceph-osd id=N
For multiple OSDs on a host, you may restart all of them with Upstart. ::
-
- sudo restart ceph-osd-all
+
+ sudo restart ceph-osd-all
For CentOS/Red Hat distributions, use::
@@ -462,6 +456,43 @@ cluster, we recommend upgrading ``ceph-common`` and client libraries
If you do not have the latest version, you may need to uninstall, auto remove
dependencies and reinstall.
+
+Transitioning to ceph-deploy
+============================
+
+If you have an existing cluster that you deployed with ``mkcephfs`` (usually
+Argonaut or Bobtail releases), you will need to make a few changes to your
+configuration to ensure that your cluster will work with ``ceph-deploy``.
+
+
+Monitor Keyring
+---------------
+
+You will need to add ``caps mon = "allow *"`` to your monitor keyring if it is
+not already in the keyring. By default, the monitor keyring is located under
+``/var/lib/ceph/mon/ceph-$id/keyring``. When you have added the ``caps``
+setting, your monitor keyring should look something like this::
+
+ [mon.]
+ key = AQBJIHhRuHCwDRAAZjBTSJcIBIoGpdOR9ToiyQ==
+ caps mon = "allow *"
+
+Adding ``caps mon = "allow *"`` will ease the transition from ``mkcephfs`` to
+``ceph-deploy`` by allowing ``ceph-create-keys`` to use the ``mon.`` keyring
+file in ``$mon_data`` and get the caps it needs.
+
+
+Use Default Paths
+-----------------
+
+Under the ``/var/lib/ceph`` directory, the ``mon`` and ``osd`` directories need
+to use the default paths.
+
+- **OSDs**: The path should be ``/var/lib/ceph/osd/ceph-$id``
+- **MON**: The path should be ``/var/lib/ceph/mon/ceph-$id``
+
+Under those directories, the keyring should be in a file named ``keyring``.
+
.. _Monitor Config Reference: ../../rados/configuration/mon-config-ref
.. _Joao's blog post: http://ceph.com/dev-notes/cephs-new-monitor-changes
.. _Ceph Authentication: ../../rados/operations/authentication/
diff --git a/doc/man/8/monmaptool.rst b/doc/man/8/monmaptool.rst
index ffc17c4c5f1..8415ba4136a 100644
--- a/doc/man/8/monmaptool.rst
+++ b/doc/man/8/monmaptool.rst
@@ -83,7 +83,7 @@ To create a new map with three monitors (for a fresh Ceph file system)::
To display the contents of the map::
- monmaptool --print onmap
+ monmaptool --print monmap
To replace one monitor::
diff --git a/doc/rados/deployment/ceph-deploy-mon.rst b/doc/rados/deployment/ceph-deploy-mon.rst
index fcd25756e64..a6123e596ca 100644
--- a/doc/rados/deployment/ceph-deploy-mon.rst
+++ b/doc/rados/deployment/ceph-deploy-mon.rst
@@ -13,10 +13,9 @@ only install one monitor per host.**
For high availability, you should run a production Ceph cluster with **AT
LEAST** three monitors. Ceph uses the Paxos algorithm, which requires a
-consensus among the majority of monitors in a quorum. You can establish a
-monitor quorum with only one monitor; however, you can not determine a majority
-with two monitors. A majority of monitors must be counted as such: 1:1, 2:3,
-3:4, 3:5, 4:6, etc.
+consensus among the majority of monitors in a quorum. With Paxos, the monitors
+cannot determine a majority for establishing a quorum with only two monitors. A
+majority of monitors must be counted as such: 1:1, 2:3, 3:4, 3:5, 4:6, etc.
See `Monitor Config Reference`_ for details on configuring monitors.
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index 2baeb8ff023..bc043fd037a 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -2,6 +2,30 @@
Release Notes
===============
+v0.67.2 "Dumpling"
+------------------
+
+This is an imporant point release for Dumpling. Most notably, it
+fixes a problem when upgrading directly from v0.56.x Bobtail to
+v0.67.x Dumpling (without stopping at v0.61.x Cuttlefish along the
+way). It also fixes a problem with the CLI parsing of the CEPH_ARGS
+environment variable, high CPU utilization by the ceph-osd daemons,
+and cleans up the radosgw shutdown sequence.
+
+Notable Changes
+~~~~~~~~~~~~~~~
+
+* objecter: resend linger requests when cluster goes from full to non-full
+* ceph: parse CEPH_ARGS environment variable
+* librados: fix small memory leak
+* osd: remove old log objects on upgrade (fixes bobtail -> dumpling jump)
+* osd: disable PGLog::check() via config option (fixes CPU burn)
+* rgw: drain requests on shutdown
+* rgw: misc memory leaks on shutdown
+
+For more detailed information, see :download:`the complete changelog <changelog/v0.67.2.txt>`.
+
+
v0.67.1 "Dumpling"
------------------
diff --git a/qa/workunits/suites/fsstress.sh b/qa/workunits/suites/fsstress.sh
index d511e375f6f..7f945172687 100755
--- a/qa/workunits/suites/fsstress.sh
+++ b/qa/workunits/suites/fsstress.sh
@@ -1,5 +1,20 @@
#!/bin/bash
+if [ ! -f /usr/lib/ltp/testcases/bin/fsstress ]
+then
+ mkdir -p /tmp/fsstress
+ cd /tmp/fsstress
+ wget -q -O /tmp/fsstress/ltp-full.tgz http://ceph.com/qa/ltp-full-20091231.tgz
+ tar xzf /tmp/fsstress/ltp-full.tgz
+ rm /tmp/fsstress/ltp-full.tgz
+ cd /tmp/fsstress/ltp-full-20091231/testcases/kernel/fs/fsstress
+ make
+ sudo mkdir -p /usr/lib/ltp/testcases/bin
+ sudo cp -avf /tmp/fsstress/ltp-full-20091231/testcases/kernel/fs/fsstress/fsstress /usr/lib/ltp/testcases/bin/fsstress
+ sudo chmod 755 /usr/lib/ltp/testcases/bin/fsstress
+ rm -Rf /tmp/fsstress
+fi
+
command="/usr/lib/ltp/testcases/bin/fsstress -d fsstress-`hostname`$$ -l 1 -n 1000 -p 10 -v"
echo "Starting fsstress $command"
diff --git a/src/Makefile.am b/src/Makefile.am
index 5a0f3472080..e895b74a0cc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,7 +20,8 @@ EXTRA_DIST = \
libs3/mswin \
libs3/src \
libs3/test \
- unittest_bufferlist.sh
+ unittest_bufferlist.sh \
+ yasm-wrapper
CLEANFILES =
bin_PROGRAMS =
@@ -1475,22 +1476,31 @@ clean-local:
# libs
+CCAS = ${srcdir}/yasm-wrapper
+AM_CCASFLAGS = -f elf64
+
+# crc
+libcrc_la_SOURCES = \
+ common/sctp_crc32.c \
+ common/crc32c.cc \
+ common/crc32c_intel_baseline.c \
+ common/crc32c_intel_fast.c
+
+if WITH_GOOD_YASM_ELF64
+libcrc_la_SOURCES += common/crc32c_intel_fast_asm.S
+libcrc_la_LIBTOOLFLAGS = --tag=CC
+endif
+
+noinst_LTLIBRARIES += libcrc.la
+
+# common
libcommon_la_SOURCES = $(libcommon_files)
libcommon_la_CFLAGS= ${CRYPTO_CFLAGS} ${AM_CFLAGS}
libcommon_la_CXXFLAGS= ${AM_CXXFLAGS}
libcommon_la_LDFLAGS = -lrt
+libcommon_la_LIBADD = libcrc.la
noinst_LTLIBRARIES += libcommon.la
-libglobal_la_SOURCES = \
- global/global_context.cc \
- global/global_init.cc \
- global/pidfile.cc \
- global/signal_handler.cc
-libglobal_la_CFLAGS= ${CRYPTO_CFLAGS} ${AM_CFLAGS}
-libglobal_la_CXXFLAGS= ${AM_CXXFLAGS}
-libglobal_la_LIBADD= libcommon.la
-noinst_LTLIBRARIES += libglobal.la
-
crush_files = \
crush/builder.c \
crush/mapper.c \
@@ -1503,6 +1513,8 @@ crush_files = \
# this list ommits the ceph_ver.c file
libcommon_files = \
./ceph_ver.c \
+ arch/probe.cc \
+ arch/intel.c \
auth/AuthAuthorizeHandler.cc \
auth/AuthClientHandler.cc \
auth/AuthSessionHandler.cc \
@@ -1533,8 +1545,6 @@ libcommon_files = \
common/Timer.cc \
common/Finisher.cc \
common/environment.cc\
- common/sctp_crc32.c\
- common/crc32c-intel.c\
common/assert.cc \
common/run_cmd.cc \
common/WorkQueue.cc \
@@ -1609,6 +1619,16 @@ else
libcommon_files += perfglue/disabled_stubs.cc
endif
+# global
+libglobal_la_SOURCES = \
+ global/global_context.cc \
+ global/global_init.cc \
+ global/pidfile.cc \
+ global/signal_handler.cc
+libglobal_la_CFLAGS= ${CRYPTO_CFLAGS} ${AM_CFLAGS}
+libglobal_la_CXXFLAGS= ${AM_CXXFLAGS}
+libglobal_la_LIBADD= libcommon.la
+noinst_LTLIBRARIES += libglobal.la
libmon_a_SOURCES = \
@@ -1744,6 +1764,8 @@ python_PYTHON = pybind/rados.py \
# that autotools doesn't magically identify.
noinst_HEADERS = \
rados_sync.h \
+ arch/probe.h \
+ arch/intel.h \
auth/cephx/CephxAuthorizeHandler.h\
auth/cephx/CephxKeyServer.h\
auth/cephx/CephxProtocol.h\
@@ -1881,10 +1903,13 @@ noinst_HEADERS = \
common/ceph_crypto.h\
common/ceph_crypto_cms.h\
common/ceph_json.h\
+ common/crc32c_intel_baseline.h\
+ common/crc32c_intel_fast.h\
common/lru_map.h\
common/utf8.h\
common/mime.h\
common/pick_address.h\
+ common/sctp_crc32.h\
common/secret.h\
common/strtol.h\
common/static_assert.h\
diff --git a/src/arch/intel.c b/src/arch/intel.c
new file mode 100644
index 00000000000..0513da53c23
--- /dev/null
+++ b/src/arch/intel.c
@@ -0,0 +1,46 @@
+#include "arch/probe.h"
+
+/* flags we export */
+int ceph_arch_intel_sse42 = 0;
+
+
+/* this probably isn't specific enough for x86_64? fix me someday */
+#ifdef __LP64__
+
+/* intel cpu? */
+static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
+ unsigned int *edx)
+{
+ int id = *eax;
+
+ asm("movl %4, %%eax;"
+ "cpuid;"
+ "movl %%eax, %0;"
+ "movl %%ebx, %1;"
+ "movl %%ecx, %2;"
+ "movl %%edx, %3;"
+ : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
+ : "r" (id)
+ : "eax", "ebx", "ecx", "edx");
+}
+
+int ceph_arch_intel_probe(void)
+{
+ /* i know how to check this on x86_64... */
+ unsigned int eax = 1, ebx, ecx, edx;
+ do_cpuid(&eax, &ebx, &ecx, &edx);
+ if ((ecx & (1 << 20)) != 0) {
+ ceph_arch_intel_sse42 = 1;
+ }
+ return 0;
+}
+
+#else // __LP64__
+
+int ceph_arch_intel_probe(void)
+{
+ /* no features */
+ return 0;
+}
+
+#endif // __LP64__
diff --git a/src/arch/intel.h b/src/arch/intel.h
new file mode 100644
index 00000000000..aefb64eaa7f
--- /dev/null
+++ b/src/arch/intel.h
@@ -0,0 +1,16 @@
+#ifndef CEPH_ARCH_INTEL_H
+#define CEPH_ARCH_INTEL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int ceph_arch_intel_sse42; /* true if we have sse 4.2 features */
+
+extern int ceph_arch_intel_probe(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/arch/probe.cc b/src/arch/probe.cc
new file mode 100644
index 00000000000..9f8bc9d2d0f
--- /dev/null
+++ b/src/arch/probe.cc
@@ -0,0 +1,20 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "arch/probe.h"
+
+#include "arch/intel.h"
+
+int ceph_arch_probe(void)
+{
+ if (ceph_arch_probed)
+ return 1;
+
+ ceph_arch_intel_probe();
+
+ ceph_arch_probed = 1;
+ return 1;
+}
+
+// do this once using the magic of c++.
+int ceph_arch_probed = ceph_arch_probe();
diff --git a/src/arch/probe.h b/src/arch/probe.h
new file mode 100644
index 00000000000..a789c4e864c
--- /dev/null
+++ b/src/arch/probe.h
@@ -0,0 +1,16 @@
+#ifndef CEPH_ARCH_PROBE_H
+#define CEPH_ARCH_PROBE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int ceph_arch_probed; /* non-zero if we've probed features */
+
+extern int ceph_arch_probe(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index b021651bd4d..89405121698 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -243,6 +243,7 @@ OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file la
OPTION(client_snapdir, OPT_STR, ".snap")
OPTION(client_mountpoint, OPT_STR, "/")
OPTION(client_notify_timeout, OPT_INT, 10) // in seconds
+OPTION(osd_client_watch_timeout, OPT_INT, 30) // in seconds
OPTION(client_caps_release_delay, OPT_INT, 5) // in seconds
OPTION(client_oc, OPT_BOOL, true)
OPTION(client_oc_size, OPT_INT, 1024*1024* 200) // MB * n
@@ -425,6 +426,11 @@ OPTION(osd_heartbeat_addr, OPT_ADDR, entity_addr_t())
OPTION(osd_heartbeat_interval, OPT_INT, 6) // (seconds) how often we ping peers
OPTION(osd_heartbeat_grace, OPT_INT, 20) // (seconds) how long before we decide a peer has failed
OPTION(osd_heartbeat_min_peers, OPT_INT, 10) // minimum number of peers
+
+// minimum number of peers tha tmust be reachable to mark ourselves
+// back up after being wrongly marked down.
+OPTION(osd_heartbeat_min_healthy_ratio, OPT_FLOAT, .33)
+
OPTION(osd_mon_heartbeat_interval, OPT_INT, 30) // (seconds) how often to ping monitor if no peers
OPTION(osd_mon_report_interval_max, OPT_INT, 120)
OPTION(osd_mon_report_interval_min, OPT_INT, 5) // pg stats, failures, up_thru, boot.
diff --git a/src/common/crc32c-intel.c b/src/common/crc32c-intel.c
deleted file mode 100644
index 1c4689e3b76..00000000000
--- a/src/common/crc32c-intel.c
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <inttypes.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-
-/* this probably isn't specific enough for x86_64? fix me someday */
-#ifdef __LP64__
-
-/*
- * * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
- * * Further based on the fio crc32c-intel.c implementation by Jens Axboe.
- * *
- * * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * * http://www.intel.com/products/processor/manuals/
- * * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * * Volume 2A: Instruction Set Reference, A-M
- * */
-
-#if BITS_PER_LONG == 64
-#define REX_PRE "0x48, "
-#define SCALE_F 8
-#else
-#define REX_PRE
-#define SCALE_F 4
-#endif
-
-static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
- unsigned length)
-{
- while (length--) {
- __asm__ __volatile__(
- ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
- :"=S"(crc)
- :"0"(crc), "c"(*data)
- );
- data++;
- }
-
- return crc;
-}
-
-uint32_t ceph_crc32c_le_intel(uint32_t crc, unsigned char const *data, unsigned length)
-{
- unsigned int iquotient = length / SCALE_F;
- unsigned int iremainder = length % SCALE_F;
-#if BITS_PER_LONG == 64
- uint64_t *ptmp = (uint64_t *) data;
-#else
- uint32_t *ptmp = (uint32_t *) data;
-#endif
-
- while (iquotient--) {
- __asm__ __volatile__(
- ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
- :"=S"(crc)
- :"0"(crc), "c"(*ptmp)
- );
- ptmp++;
- }
-
- if (iremainder)
- crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
- iremainder);
-
- return crc;
-}
-
-
-static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
- unsigned int *edx)
-{
- int id = *eax;
-
- asm("movl %4, %%eax;"
- "cpuid;"
- "movl %%eax, %0;"
- "movl %%ebx, %1;"
- "movl %%ecx, %2;"
- "movl %%edx, %3;"
- : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx)
- : "r" (id)
- : "eax", "ebx", "ecx", "edx");
-}
-
-int ceph_have_crc32c_intel(void)
-{
- /* i know how to check this on x86_64... */
- unsigned int eax = 1, ebx, ecx, edx;
- do_cpuid(&eax, &ebx, &ecx, &edx);
- if ((ecx & (1 << 20)) != 0)
- return 1;
- return 0;
-}
-
-#else /* __LP64__ */
-
-uint32_t ceph_crc32c_le_intel(uint32_t crc, unsigned char const *data, unsigned length)
-{
- return 0; /* this shouldn't get called! */
-}
-
-int ceph_have_crc32c_intel(void)
-{
- return 0; /* clearly not x86_64 */
-}
-
-#endif
diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc
new file mode 100644
index 00000000000..e2e81a42f45
--- /dev/null
+++ b/src/common/crc32c.cc
@@ -0,0 +1,41 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/crc32c.h"
+
+#include "arch/probe.h"
+#include "arch/intel.h"
+#include "common/sctp_crc32.h"
+#include "common/crc32c_intel_baseline.h"
+#include "common/crc32c_intel_fast.h"
+
+/*
+ * choose best implementation based on the CPU architecture.
+ */
+ceph_crc32c_func_t ceph_choose_crc32(void)
+{
+ // make sure we've probed cpu features; this might depend on the
+ // link order of this file relative to arch/probe.cc.
+ ceph_arch_probe();
+
+ // if the CPU supports it, *and* the fast version is compiled in,
+ // use that.
+ if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) {
+ return ceph_crc32c_intel_fast;
+ }
+
+ // default
+ return ceph_crc32c_sctp;
+}
+
+/*
+ * static global
+ *
+ * This is a bit of a no-no for shared libraries, but we don't care.
+ * It is effectively constant for the executing process as the value
+ * depends on the CPU architecture.
+ *
+ * We initialize it during program init using the magic of C++.
+ */
+ceph_crc32c_func_t ceph_crc32c_func = ceph_choose_crc32();
+
diff --git a/src/common/crc32c_intel_baseline.c b/src/common/crc32c_intel_baseline.c
new file mode 100644
index 00000000000..cfcfec624ae
--- /dev/null
+++ b/src/common/crc32c_intel_baseline.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2012-2013 Intel Corporation All Rights Reserved.
+ * All rights reserved.
+ *
+ * http://opensource.org/licenses/BSD-3-Clause
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * * Neither the name of the Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#define MAX_ITER 8
+
+unsigned long crc32_table_iscsi_base[256] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
+};
+
+
+// iSCSI CRC baseline function
+uint32_t ceph_crc32c_intel_baseline(uint32_t crc_init2, unsigned char const *buffer, unsigned len)
+{
+ unsigned int crc_init = crc_init2;
+ unsigned int crc;
+ unsigned char* p_buf;
+
+ p_buf = (unsigned char*)buffer;
+ unsigned char const * p_end = buffer + len;
+
+ crc = crc_init;
+
+ while(p_buf < (unsigned char *) p_end ){
+ crc = (crc >> 8) ^ crc32_table_iscsi_base[(crc & 0x000000FF) ^ *p_buf++] ;
+ }
+ return crc;
+}
diff --git a/src/common/crc32c_intel_baseline.h b/src/common/crc32c_intel_baseline.h
new file mode 100644
index 00000000000..5b14ddfc07e
--- /dev/null
+++ b/src/common/crc32c_intel_baseline.h
@@ -0,0 +1,14 @@
+#ifndef CEPH_COMMON_CRC32C_INTEL_BASELINE_H
+#define CEPH_COMMON_CRC32C_INTEL_BASELINE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uint32_t ceph_crc32c_intel_baseline(uint32_t crc, unsigned char const *buffer, unsigned len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/common/crc32c_intel_fast.c b/src/common/crc32c_intel_fast.c
new file mode 100644
index 00000000000..10b3c1c5c27
--- /dev/null
+++ b/src/common/crc32c_intel_fast.c
@@ -0,0 +1,30 @@
+#include <inttypes.h>
+#include "acconfig.h"
+
+extern unsigned int crc32_iscsi_00(unsigned char const *buffer, int len, unsigned int crc);
+
+#ifdef WITH_GOOD_YASM_ELF64
+
+uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return crc32_iscsi_00(buffer, len, crc);
+}
+
+int ceph_crc32c_intel_fast_exists(void)
+{
+ return 1;
+}
+
+#else
+
+int ceph_crc32c_intel_fast_exists(void)
+{
+ return 0;
+}
+
+uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
+#endif
diff --git a/src/common/crc32c_intel_fast.h b/src/common/crc32c_intel_fast.h
new file mode 100644
index 00000000000..7a394a0b82c
--- /dev/null
+++ b/src/common/crc32c_intel_fast.h
@@ -0,0 +1,28 @@
+#ifndef CEPH_COMMON_CRC32C_INTEL_FAST_H
+#define CEPH_COMMON_CRC32C_INTEL_FAST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* is the fast version compiled in */
+extern int ceph_crc32c_intel_fast_exists(void);
+
+#ifdef __LP64__
+
+extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len);
+
+#else
+
+static inline uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/common/crc32c_intel_fast_asm.S b/src/common/crc32c_intel_fast_asm.S
new file mode 100644
index 00000000000..4ca5d65032e
--- /dev/null
+++ b/src/common/crc32c_intel_fast_asm.S
@@ -0,0 +1,664 @@
+;
+; Copyright 2012-2013 Intel Corporation All Rights Reserved.
+; All rights reserved.
+;
+; http://opensource.org/licenses/BSD-3-Clause
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following
+; conditions are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+; FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+; COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+; OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+
+; Function to compute iscsi CRC32 with table-based recombination
+; crc done "by 3" with block sizes 1920, 960, 480, 240
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; crcB3 MACRO to implement crc32 on 3 %%bSize-byte blocks
+%macro crcB3 3
+%define %%bSize %1 ; 1/3 of buffer size
+%define %%td2 %2 ; table offset for crc0 (2/3 of buffer)
+%define %%td1 %3 ; table offset for crc1 (1/3 of buffer)
+
+%IF %%bSize=640
+ sub len, %%bSize*3
+ js %%crcB3_end ;; jump to next level if 3*blockSize > len
+%ELSE
+ cmp len, %%bSize*3
+ jnae %%crcB3_end ;; jump to next level if 3*blockSize > len
+%ENDIF
+ ;;;;;; Calculate CRC of 3 blocks of the buffer ;;;;;;
+%%crcB3_loop:
+ ;; rax = crc0 = initial crc
+ xor rbx, rbx ;; rbx = crc1 = 0;
+ xor r10, r10 ;; r10 = crc2 = 0;
+
+ %assign i 0
+ %rep %%bSize/8 - 1
+ crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1
+ crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2
+ %assign i (i+8)
+ %endrep
+ crc32 rax, [bufptmp+i + 0*%%bSize] ;; update crc0
+ crc32 rbx, [bufptmp+i + 1*%%bSize] ;; update crc1
+; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2
+
+ ; merge in crc0
+ movzx bufp_dw, al
+ mov r9d, [crc_init + bufp*4 + %%td2]
+ movzx bufp_dw, ah
+ shr eax, 16
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ shl r11, 8
+ xor r9, r11
+
+ movzx bufp_dw, al
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ movzx bufp_dw, ah
+ shl r11, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td2]
+ shl r11, 24
+ xor r9, r11
+
+ ; merge in crc1
+
+ movzx bufp_dw, bl
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ movzx bufp_dw, bh
+ shr ebx, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ shl r11, 8
+ xor r9, r11
+
+ movzx bufp_dw, bl
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ movzx bufp_dw, bh
+ shl r11, 16
+ xor r9, r11
+ mov r11d, [crc_init + bufp*4 + %%td1]
+ shl r11, 24
+ xor r9, r11
+
+ xor r9, [bufptmp+i + 2*%%bSize]
+ crc32 r10, r9
+ mov rax, r10
+
+ add bufptmp, %%bSize*3 ;; move to next block
+ sub len, %%bSize*3
+%IF %%bSize=640
+ jns %%crcB3_loop
+%ENDIF
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%%crcB3_end:
+%IF %%bSize=640
+ add len, %%bSize*3
+%ENDIF
+ je do_return ;; return if remaining data is zero
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; ISCSI CRC 32 Implementation with crc32 Instruction
+
+;;; unsigned int crc32_iscsi_00(unsigned char * buffer, int len, unsigned int crc_init);
+;;;
+;;; *buf = rcx
+;;; len = rdx
+;;; crc_init = r8
+;;;
+
+global crc32_iscsi_00:function
+crc32_iscsi_00:
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define bufp rdi
+%define bufp_dw edi
+%define bufp_w di
+%define bufp_b dil
+%define bufptmp rcx
+%define block_0 rcx
+%define block_1 r8
+%define block_2 r11
+%define len rsi
+%define len_dw esi
+%define len_w si
+%define len_b sil
+%define crc_init rdx
+%define crc_init_dw edx
+%else
+%define bufp rcx
+%define bufp_dw ecx
+%define bufp_w cx
+%define bufp_b cl
+%define bufptmp rdi
+%define block_0 rdi
+%define block_1 rsi
+%define block_2 r11
+%define len rdx
+%define len_dw edx
+%define len_w dx
+%define len_b dl
+%define crc_init r8
+%define crc_init_dw r8d
+%endif
+
+
+ push rdi
+ push rbx
+
+ mov rax, crc_init ;; rax = crc_init;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 1) ALIGN: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ mov bufptmp, bufp ;; rdi = *buf
+ neg bufp
+ and bufp, 7 ;; calculate the unalignment
+ ;; amount of the address
+ je proc_block ;; Skip if aligned
+
+ cmp len, 8
+ jb less_than_8
+
+ ;;;; Calculate CRC of unaligned bytes of the buffer (if any) ;;;;
+ mov rbx, [bufptmp] ;; load a quadword from the buffer
+ add bufptmp, bufp ;; align buffer pointer for
+ ;; quadword processing
+ sub len, bufp ;; update buffer length
+align_loop:
+ crc32 eax, bl ;; compute crc32 of 1-byte
+ shr rbx, 8 ;; get next byte
+ dec bufp
+ jne align_loop
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 2) BLOCK LEVEL: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+proc_block:
+ cmp len, 240
+ jb bit8
+
+ lea crc_init, [mul_table_72 wrt rip] ;; load table base address
+
+ crcB3 640, 0x1000, 0x0c00 ; 640*3 = 1920 (Tables 1280, 640)
+ crcB3 320, 0x0c00, 0x0800 ; 320*3 = 960 (Tables 640, 320)
+ crcB3 160, 0x0800, 0x0400 ; 160*3 = 480 (Tables 320, 160)
+ crcB3 80, 0x0400, 0x0000 ; 80*3 = 240 (Tables 160, 80)
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;4) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of rdx are full)
+
+bit8:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit7 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 16
+ crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 128 ;; buf +=64; (next 64 bytes)
+
+bit7:
+ shl len_b, 1 ;; shift-out MSB (bit-7)
+ jnc bit6 ;; jump to bit-6 if bit-7 == 0
+ %assign i 0
+ %rep 8
+ crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 64 ;; buf +=64; (next 64 bytes)
+bit6:
+ shl len_b, 1 ;; shift-out MSB (bit-6)
+ jnc bit5 ;; jump to bit-5 if bit-6 == 0
+ %assign i 0
+ %rep 4
+ crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 32 ;; buf +=32; (next 32 bytes)
+bit5:
+ shl len_b, 1 ;; shift-out MSB (bit-5)
+ jnc bit4 ;; jump to bit-4 if bit-5 == 0
+ %assign i 0
+ %rep 2
+ crc32 rax, [bufptmp+i] ;; compute crc32 of 8-byte data
+ %assign i (i+8)
+ %endrep
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 16 ;; buf +=16; (next 16 bytes)
+bit4:
+ shl len_b, 1 ;; shift-out MSB (bit-4)
+ jnc bit3 ;; jump to bit-3 if bit-4 == 0
+ crc32 rax, [bufptmp] ;; compute crc32 of 8-byte data
+ je do_return ;; return if remaining data is zero
+ add bufptmp, 8 ;; buf +=8; (next 8 bytes)
+bit3:
+ mov rbx, [bufptmp] ;; load a 8-bytes from the buffer:
+ shl len_b, 1 ;; shift-out MSB (bit-3)
+ jnc bit2 ;; jump to bit-2 if bit-3 == 0
+ crc32 eax, ebx ;; compute crc32 of 4-byte data
+ je do_return ;; return if remaining data is zero
+ shr rbx, 32 ;; get next 3 bytes
+bit2:
+ shl len_b, 1 ;; shift-out MSB (bit-2)
+ jnc bit1 ;; jump to bit-1 if bit-2 == 0
+ crc32 eax, bx ;; compute crc32 of 2-byte data
+ je do_return ;; return if remaining data is zero
+ shr rbx, 16 ;; next byte
+bit1:
+ test len_b,len_b
+ je do_return
+ crc32 eax, bl ;; compute crc32 of 1-byte data
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+do_return:
+
+ pop rbx
+ pop rdi
+ ret
+
+less_than_8:
+ test len,4
+ jz less_than_4
+ crc32 eax, dword[bufptmp]
+ add bufptmp,4
+less_than_4:
+ test len,2
+ jz less_than_2
+ crc32 eax, word[bufptmp]
+ add bufptmp,2
+less_than_2:
+ test len,1
+ jz do_return
+ crc32 rax, byte[bufptmp]
+ pop rbx
+ pop bufptmp
+ ret
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; global mul_table_72, mul_table_152, mul_table_312, mul_table_632, mul_table_1272
+
+section .data
+align 8
+mul_table_72:
+DD 0x00000000,0x39d3b296,0x73a7652c,0x4a74d7ba
+DD 0xe74eca58,0xde9d78ce,0x94e9af74,0xad3a1de2
+DD 0xcb71e241,0xf2a250d7,0xb8d6876d,0x810535fb
+DD 0x2c3f2819,0x15ec9a8f,0x5f984d35,0x664bffa3
+DD 0x930fb273,0xaadc00e5,0xe0a8d75f,0xd97b65c9
+DD 0x7441782b,0x4d92cabd,0x07e61d07,0x3e35af91
+DD 0x587e5032,0x61ade2a4,0x2bd9351e,0x120a8788
+DD 0xbf309a6a,0x86e328fc,0xcc97ff46,0xf5444dd0
+DD 0x23f31217,0x1a20a081,0x5054773b,0x6987c5ad
+DD 0xc4bdd84f,0xfd6e6ad9,0xb71abd63,0x8ec90ff5
+DD 0xe882f056,0xd15142c0,0x9b25957a,0xa2f627ec
+DD 0x0fcc3a0e,0x361f8898,0x7c6b5f22,0x45b8edb4
+DD 0xb0fca064,0x892f12f2,0xc35bc548,0xfa8877de
+DD 0x57b26a3c,0x6e61d8aa,0x24150f10,0x1dc6bd86
+DD 0x7b8d4225,0x425ef0b3,0x082a2709,0x31f9959f
+DD 0x9cc3887d,0xa5103aeb,0xef64ed51,0xd6b75fc7
+DD 0x47e6242e,0x7e3596b8,0x34414102,0x0d92f394
+DD 0xa0a8ee76,0x997b5ce0,0xd30f8b5a,0xeadc39cc
+DD 0x8c97c66f,0xb54474f9,0xff30a343,0xc6e311d5
+DD 0x6bd90c37,0x520abea1,0x187e691b,0x21addb8d
+DD 0xd4e9965d,0xed3a24cb,0xa74ef371,0x9e9d41e7
+DD 0x33a75c05,0x0a74ee93,0x40003929,0x79d38bbf
+DD 0x1f98741c,0x264bc68a,0x6c3f1130,0x55eca3a6
+DD 0xf8d6be44,0xc1050cd2,0x8b71db68,0xb2a269fe
+DD 0x64153639,0x5dc684af,0x17b25315,0x2e61e183
+DD 0x835bfc61,0xba884ef7,0xf0fc994d,0xc92f2bdb
+DD 0xaf64d478,0x96b766ee,0xdcc3b154,0xe51003c2
+DD 0x482a1e20,0x71f9acb6,0x3b8d7b0c,0x025ec99a
+DD 0xf71a844a,0xcec936dc,0x84bde166,0xbd6e53f0
+DD 0x10544e12,0x2987fc84,0x63f32b3e,0x5a2099a8
+DD 0x3c6b660b,0x05b8d49d,0x4fcc0327,0x761fb1b1
+DD 0xdb25ac53,0xe2f61ec5,0xa882c97f,0x91517be9
+DD 0x8fcc485c,0xb61ffaca,0xfc6b2d70,0xc5b89fe6
+DD 0x68828204,0x51513092,0x1b25e728,0x22f655be
+DD 0x44bdaa1d,0x7d6e188b,0x371acf31,0x0ec97da7
+DD 0xa3f36045,0x9a20d2d3,0xd0540569,0xe987b7ff
+DD 0x1cc3fa2f,0x251048b9,0x6f649f03,0x56b72d95
+DD 0xfb8d3077,0xc25e82e1,0x882a555b,0xb1f9e7cd
+DD 0xd7b2186e,0xee61aaf8,0xa4157d42,0x9dc6cfd4
+DD 0x30fcd236,0x092f60a0,0x435bb71a,0x7a88058c
+DD 0xac3f5a4b,0x95ece8dd,0xdf983f67,0xe64b8df1
+DD 0x4b719013,0x72a22285,0x38d6f53f,0x010547a9
+DD 0x674eb80a,0x5e9d0a9c,0x14e9dd26,0x2d3a6fb0
+DD 0x80007252,0xb9d3c0c4,0xf3a7177e,0xca74a5e8
+DD 0x3f30e838,0x06e35aae,0x4c978d14,0x75443f82
+DD 0xd87e2260,0xe1ad90f6,0xabd9474c,0x920af5da
+DD 0xf4410a79,0xcd92b8ef,0x87e66f55,0xbe35ddc3
+DD 0x130fc021,0x2adc72b7,0x60a8a50d,0x597b179b
+DD 0xc82a6c72,0xf1f9dee4,0xbb8d095e,0x825ebbc8
+DD 0x2f64a62a,0x16b714bc,0x5cc3c306,0x65107190
+DD 0x035b8e33,0x3a883ca5,0x70fceb1f,0x492f5989
+DD 0xe415446b,0xddc6f6fd,0x97b22147,0xae6193d1
+DD 0x5b25de01,0x62f66c97,0x2882bb2d,0x115109bb
+DD 0xbc6b1459,0x85b8a6cf,0xcfcc7175,0xf61fc3e3
+DD 0x90543c40,0xa9878ed6,0xe3f3596c,0xda20ebfa
+DD 0x771af618,0x4ec9448e,0x04bd9334,0x3d6e21a2
+DD 0xebd97e65,0xd20accf3,0x987e1b49,0xa1ada9df
+DD 0x0c97b43d,0x354406ab,0x7f30d111,0x46e36387
+DD 0x20a89c24,0x197b2eb2,0x530ff908,0x6adc4b9e
+DD 0xc7e6567c,0xfe35e4ea,0xb4413350,0x8d9281c6
+DD 0x78d6cc16,0x41057e80,0x0b71a93a,0x32a21bac
+DD 0x9f98064e,0xa64bb4d8,0xec3f6362,0xd5ecd1f4
+DD 0xb3a72e57,0x8a749cc1,0xc0004b7b,0xf9d3f9ed
+DD 0x54e9e40f,0x6d3a5699,0x274e8123,0x1e9d33b5
+
+mul_table_152:
+DD 0x00000000,0x878a92a7,0x0af953bf,0x8d73c118
+DD 0x15f2a77e,0x927835d9,0x1f0bf4c1,0x98816666
+DD 0x2be54efc,0xac6fdc5b,0x211c1d43,0xa6968fe4
+DD 0x3e17e982,0xb99d7b25,0x34eeba3d,0xb364289a
+DD 0x57ca9df8,0xd0400f5f,0x5d33ce47,0xdab95ce0
+DD 0x42383a86,0xc5b2a821,0x48c16939,0xcf4bfb9e
+DD 0x7c2fd304,0xfba541a3,0x76d680bb,0xf15c121c
+DD 0x69dd747a,0xee57e6dd,0x632427c5,0xe4aeb562
+DD 0xaf953bf0,0x281fa957,0xa56c684f,0x22e6fae8
+DD 0xba679c8e,0x3ded0e29,0xb09ecf31,0x37145d96
+DD 0x8470750c,0x03fae7ab,0x8e8926b3,0x0903b414
+DD 0x9182d272,0x160840d5,0x9b7b81cd,0x1cf1136a
+DD 0xf85fa608,0x7fd534af,0xf2a6f5b7,0x752c6710
+DD 0xedad0176,0x6a2793d1,0xe75452c9,0x60dec06e
+DD 0xd3bae8f4,0x54307a53,0xd943bb4b,0x5ec929ec
+DD 0xc6484f8a,0x41c2dd2d,0xccb11c35,0x4b3b8e92
+DD 0x5ac60111,0xdd4c93b6,0x503f52ae,0xd7b5c009
+DD 0x4f34a66f,0xc8be34c8,0x45cdf5d0,0xc2476777
+DD 0x71234fed,0xf6a9dd4a,0x7bda1c52,0xfc508ef5
+DD 0x64d1e893,0xe35b7a34,0x6e28bb2c,0xe9a2298b
+DD 0x0d0c9ce9,0x8a860e4e,0x07f5cf56,0x807f5df1
+DD 0x18fe3b97,0x9f74a930,0x12076828,0x958dfa8f
+DD 0x26e9d215,0xa16340b2,0x2c1081aa,0xab9a130d
+DD 0x331b756b,0xb491e7cc,0x39e226d4,0xbe68b473
+DD 0xf5533ae1,0x72d9a846,0xffaa695e,0x7820fbf9
+DD 0xe0a19d9f,0x672b0f38,0xea58ce20,0x6dd25c87
+DD 0xdeb6741d,0x593ce6ba,0xd44f27a2,0x53c5b505
+DD 0xcb44d363,0x4cce41c4,0xc1bd80dc,0x4637127b
+DD 0xa299a719,0x251335be,0xa860f4a6,0x2fea6601
+DD 0xb76b0067,0x30e192c0,0xbd9253d8,0x3a18c17f
+DD 0x897ce9e5,0x0ef67b42,0x8385ba5a,0x040f28fd
+DD 0x9c8e4e9b,0x1b04dc3c,0x96771d24,0x11fd8f83
+DD 0xb58c0222,0x32069085,0xbf75519d,0x38ffc33a
+DD 0xa07ea55c,0x27f437fb,0xaa87f6e3,0x2d0d6444
+DD 0x9e694cde,0x19e3de79,0x94901f61,0x131a8dc6
+DD 0x8b9beba0,0x0c117907,0x8162b81f,0x06e82ab8
+DD 0xe2469fda,0x65cc0d7d,0xe8bfcc65,0x6f355ec2
+DD 0xf7b438a4,0x703eaa03,0xfd4d6b1b,0x7ac7f9bc
+DD 0xc9a3d126,0x4e294381,0xc35a8299,0x44d0103e
+DD 0xdc517658,0x5bdbe4ff,0xd6a825e7,0x5122b740
+DD 0x1a1939d2,0x9d93ab75,0x10e06a6d,0x976af8ca
+DD 0x0feb9eac,0x88610c0b,0x0512cd13,0x82985fb4
+DD 0x31fc772e,0xb676e589,0x3b052491,0xbc8fb636
+DD 0x240ed050,0xa38442f7,0x2ef783ef,0xa97d1148
+DD 0x4dd3a42a,0xca59368d,0x472af795,0xc0a06532
+DD 0x58210354,0xdfab91f3,0x52d850eb,0xd552c24c
+DD 0x6636ead6,0xe1bc7871,0x6ccfb969,0xeb452bce
+DD 0x73c44da8,0xf44edf0f,0x793d1e17,0xfeb78cb0
+DD 0xef4a0333,0x68c09194,0xe5b3508c,0x6239c22b
+DD 0xfab8a44d,0x7d3236ea,0xf041f7f2,0x77cb6555
+DD 0xc4af4dcf,0x4325df68,0xce561e70,0x49dc8cd7
+DD 0xd15deab1,0x56d77816,0xdba4b90e,0x5c2e2ba9
+DD 0xb8809ecb,0x3f0a0c6c,0xb279cd74,0x35f35fd3
+DD 0xad7239b5,0x2af8ab12,0xa78b6a0a,0x2001f8ad
+DD 0x9365d037,0x14ef4290,0x999c8388,0x1e16112f
+DD 0x86977749,0x011de5ee,0x8c6e24f6,0x0be4b651
+DD 0x40df38c3,0xc755aa64,0x4a266b7c,0xcdacf9db
+DD 0x552d9fbd,0xd2a70d1a,0x5fd4cc02,0xd85e5ea5
+DD 0x6b3a763f,0xecb0e498,0x61c32580,0xe649b727
+DD 0x7ec8d141,0xf94243e6,0x743182fe,0xf3bb1059
+DD 0x1715a53b,0x909f379c,0x1decf684,0x9a666423
+DD 0x02e70245,0x856d90e2,0x081e51fa,0x8f94c35d
+DD 0x3cf0ebc7,0xbb7a7960,0x3609b878,0xb1832adf
+DD 0x29024cb9,0xae88de1e,0x23fb1f06,0xa4718da1
+
+mul_table_312:
+DD 0x00000000,0xbac2fd7b,0x70698c07,0xcaab717c
+DD 0xe0d3180e,0x5a11e575,0x90ba9409,0x2a786972
+DD 0xc44a46ed,0x7e88bb96,0xb423caea,0x0ee13791
+DD 0x24995ee3,0x9e5ba398,0x54f0d2e4,0xee322f9f
+DD 0x8d78fb2b,0x37ba0650,0xfd11772c,0x47d38a57
+DD 0x6dabe325,0xd7691e5e,0x1dc26f22,0xa7009259
+DD 0x4932bdc6,0xf3f040bd,0x395b31c1,0x8399ccba
+DD 0xa9e1a5c8,0x132358b3,0xd98829cf,0x634ad4b4
+DD 0x1f1d80a7,0xa5df7ddc,0x6f740ca0,0xd5b6f1db
+DD 0xffce98a9,0x450c65d2,0x8fa714ae,0x3565e9d5
+DD 0xdb57c64a,0x61953b31,0xab3e4a4d,0x11fcb736
+DD 0x3b84de44,0x8146233f,0x4bed5243,0xf12faf38
+DD 0x92657b8c,0x28a786f7,0xe20cf78b,0x58ce0af0
+DD 0x72b66382,0xc8749ef9,0x02dfef85,0xb81d12fe
+DD 0x562f3d61,0xecedc01a,0x2646b166,0x9c844c1d
+DD 0xb6fc256f,0x0c3ed814,0xc695a968,0x7c575413
+DD 0x3e3b014e,0x84f9fc35,0x4e528d49,0xf4907032
+DD 0xdee81940,0x642ae43b,0xae819547,0x1443683c
+DD 0xfa7147a3,0x40b3bad8,0x8a18cba4,0x30da36df
+DD 0x1aa25fad,0xa060a2d6,0x6acbd3aa,0xd0092ed1
+DD 0xb343fa65,0x0981071e,0xc32a7662,0x79e88b19
+DD 0x5390e26b,0xe9521f10,0x23f96e6c,0x993b9317
+DD 0x7709bc88,0xcdcb41f3,0x0760308f,0xbda2cdf4
+DD 0x97daa486,0x2d1859fd,0xe7b32881,0x5d71d5fa
+DD 0x212681e9,0x9be47c92,0x514f0dee,0xeb8df095
+DD 0xc1f599e7,0x7b37649c,0xb19c15e0,0x0b5ee89b
+DD 0xe56cc704,0x5fae3a7f,0x95054b03,0x2fc7b678
+DD 0x05bfdf0a,0xbf7d2271,0x75d6530d,0xcf14ae76
+DD 0xac5e7ac2,0x169c87b9,0xdc37f6c5,0x66f50bbe
+DD 0x4c8d62cc,0xf64f9fb7,0x3ce4eecb,0x862613b0
+DD 0x68143c2f,0xd2d6c154,0x187db028,0xa2bf4d53
+DD 0x88c72421,0x3205d95a,0xf8aea826,0x426c555d
+DD 0x7c76029c,0xc6b4ffe7,0x0c1f8e9b,0xb6dd73e0
+DD 0x9ca51a92,0x2667e7e9,0xeccc9695,0x560e6bee
+DD 0xb83c4471,0x02feb90a,0xc855c876,0x7297350d
+DD 0x58ef5c7f,0xe22da104,0x2886d078,0x92442d03
+DD 0xf10ef9b7,0x4bcc04cc,0x816775b0,0x3ba588cb
+DD 0x11dde1b9,0xab1f1cc2,0x61b46dbe,0xdb7690c5
+DD 0x3544bf5a,0x8f864221,0x452d335d,0xffefce26
+DD 0xd597a754,0x6f555a2f,0xa5fe2b53,0x1f3cd628
+DD 0x636b823b,0xd9a97f40,0x13020e3c,0xa9c0f347
+DD 0x83b89a35,0x397a674e,0xf3d11632,0x4913eb49
+DD 0xa721c4d6,0x1de339ad,0xd74848d1,0x6d8ab5aa
+DD 0x47f2dcd8,0xfd3021a3,0x379b50df,0x8d59ada4
+DD 0xee137910,0x54d1846b,0x9e7af517,0x24b8086c
+DD 0x0ec0611e,0xb4029c65,0x7ea9ed19,0xc46b1062
+DD 0x2a593ffd,0x909bc286,0x5a30b3fa,0xe0f24e81
+DD 0xca8a27f3,0x7048da88,0xbae3abf4,0x0021568f
+DD 0x424d03d2,0xf88ffea9,0x32248fd5,0x88e672ae
+DD 0xa29e1bdc,0x185ce6a7,0xd2f797db,0x68356aa0
+DD 0x8607453f,0x3cc5b844,0xf66ec938,0x4cac3443
+DD 0x66d45d31,0xdc16a04a,0x16bdd136,0xac7f2c4d
+DD 0xcf35f8f9,0x75f70582,0xbf5c74fe,0x059e8985
+DD 0x2fe6e0f7,0x95241d8c,0x5f8f6cf0,0xe54d918b
+DD 0x0b7fbe14,0xb1bd436f,0x7b163213,0xc1d4cf68
+DD 0xebaca61a,0x516e5b61,0x9bc52a1d,0x2107d766
+DD 0x5d508375,0xe7927e0e,0x2d390f72,0x97fbf209
+DD 0xbd839b7b,0x07416600,0xcdea177c,0x7728ea07
+DD 0x991ac598,0x23d838e3,0xe973499f,0x53b1b4e4
+DD 0x79c9dd96,0xc30b20ed,0x09a05191,0xb362acea
+DD 0xd028785e,0x6aea8525,0xa041f459,0x1a830922
+DD 0x30fb6050,0x8a399d2b,0x4092ec57,0xfa50112c
+DD 0x14623eb3,0xaea0c3c8,0x640bb2b4,0xdec94fcf
+DD 0xf4b126bd,0x4e73dbc6,0x84d8aaba,0x3e1a57c1
+
+mul_table_632:
+DD 0x00000000,0x6b749fb2,0xd6e93f64,0xbd9da0d6
+DD 0xa83e0839,0xc34a978b,0x7ed7375d,0x15a3a8ef
+DD 0x55906683,0x3ee4f931,0x837959e7,0xe80dc655
+DD 0xfdae6eba,0x96daf108,0x2b4751de,0x4033ce6c
+DD 0xab20cd06,0xc05452b4,0x7dc9f262,0x16bd6dd0
+DD 0x031ec53f,0x686a5a8d,0xd5f7fa5b,0xbe8365e9
+DD 0xfeb0ab85,0x95c43437,0x285994e1,0x432d0b53
+DD 0x568ea3bc,0x3dfa3c0e,0x80679cd8,0xeb13036a
+DD 0x53adecfd,0x38d9734f,0x8544d399,0xee304c2b
+DD 0xfb93e4c4,0x90e77b76,0x2d7adba0,0x460e4412
+DD 0x063d8a7e,0x6d4915cc,0xd0d4b51a,0xbba02aa8
+DD 0xae038247,0xc5771df5,0x78eabd23,0x139e2291
+DD 0xf88d21fb,0x93f9be49,0x2e641e9f,0x4510812d
+DD 0x50b329c2,0x3bc7b670,0x865a16a6,0xed2e8914
+DD 0xad1d4778,0xc669d8ca,0x7bf4781c,0x1080e7ae
+DD 0x05234f41,0x6e57d0f3,0xd3ca7025,0xb8beef97
+DD 0xa75bd9fa,0xcc2f4648,0x71b2e69e,0x1ac6792c
+DD 0x0f65d1c3,0x64114e71,0xd98ceea7,0xb2f87115
+DD 0xf2cbbf79,0x99bf20cb,0x2422801d,0x4f561faf
+DD 0x5af5b740,0x318128f2,0x8c1c8824,0xe7681796
+DD 0x0c7b14fc,0x670f8b4e,0xda922b98,0xb1e6b42a
+DD 0xa4451cc5,0xcf318377,0x72ac23a1,0x19d8bc13
+DD 0x59eb727f,0x329fedcd,0x8f024d1b,0xe476d2a9
+DD 0xf1d57a46,0x9aa1e5f4,0x273c4522,0x4c48da90
+DD 0xf4f63507,0x9f82aab5,0x221f0a63,0x496b95d1
+DD 0x5cc83d3e,0x37bca28c,0x8a21025a,0xe1559de8
+DD 0xa1665384,0xca12cc36,0x778f6ce0,0x1cfbf352
+DD 0x09585bbd,0x622cc40f,0xdfb164d9,0xb4c5fb6b
+DD 0x5fd6f801,0x34a267b3,0x893fc765,0xe24b58d7
+DD 0xf7e8f038,0x9c9c6f8a,0x2101cf5c,0x4a7550ee
+DD 0x0a469e82,0x61320130,0xdcafa1e6,0xb7db3e54
+DD 0xa27896bb,0xc90c0909,0x7491a9df,0x1fe5366d
+DD 0x4b5bc505,0x202f5ab7,0x9db2fa61,0xf6c665d3
+DD 0xe365cd3c,0x8811528e,0x358cf258,0x5ef86dea
+DD 0x1ecba386,0x75bf3c34,0xc8229ce2,0xa3560350
+DD 0xb6f5abbf,0xdd81340d,0x601c94db,0x0b680b69
+DD 0xe07b0803,0x8b0f97b1,0x36923767,0x5de6a8d5
+DD 0x4845003a,0x23319f88,0x9eac3f5e,0xf5d8a0ec
+DD 0xb5eb6e80,0xde9ff132,0x630251e4,0x0876ce56
+DD 0x1dd566b9,0x76a1f90b,0xcb3c59dd,0xa048c66f
+DD 0x18f629f8,0x7382b64a,0xce1f169c,0xa56b892e
+DD 0xb0c821c1,0xdbbcbe73,0x66211ea5,0x0d558117
+DD 0x4d664f7b,0x2612d0c9,0x9b8f701f,0xf0fbefad
+DD 0xe5584742,0x8e2cd8f0,0x33b17826,0x58c5e794
+DD 0xb3d6e4fe,0xd8a27b4c,0x653fdb9a,0x0e4b4428
+DD 0x1be8ecc7,0x709c7375,0xcd01d3a3,0xa6754c11
+DD 0xe646827d,0x8d321dcf,0x30afbd19,0x5bdb22ab
+DD 0x4e788a44,0x250c15f6,0x9891b520,0xf3e52a92
+DD 0xec001cff,0x8774834d,0x3ae9239b,0x519dbc29
+DD 0x443e14c6,0x2f4a8b74,0x92d72ba2,0xf9a3b410
+DD 0xb9907a7c,0xd2e4e5ce,0x6f794518,0x040ddaaa
+DD 0x11ae7245,0x7adaedf7,0xc7474d21,0xac33d293
+DD 0x4720d1f9,0x2c544e4b,0x91c9ee9d,0xfabd712f
+DD 0xef1ed9c0,0x846a4672,0x39f7e6a4,0x52837916
+DD 0x12b0b77a,0x79c428c8,0xc459881e,0xaf2d17ac
+DD 0xba8ebf43,0xd1fa20f1,0x6c678027,0x07131f95
+DD 0xbfadf002,0xd4d96fb0,0x6944cf66,0x023050d4
+DD 0x1793f83b,0x7ce76789,0xc17ac75f,0xaa0e58ed
+DD 0xea3d9681,0x81490933,0x3cd4a9e5,0x57a03657
+DD 0x42039eb8,0x2977010a,0x94eaa1dc,0xff9e3e6e
+DD 0x148d3d04,0x7ff9a2b6,0xc2640260,0xa9109dd2
+DD 0xbcb3353d,0xd7c7aa8f,0x6a5a0a59,0x012e95eb
+DD 0x411d5b87,0x2a69c435,0x97f464e3,0xfc80fb51
+DD 0xe92353be,0x8257cc0c,0x3fca6cda,0x54bef368
+
+mul_table_1272:
+DD 0x00000000,0xdd66cbbb,0xbf21e187,0x62472a3c
+DD 0x7bafb5ff,0xa6c97e44,0xc48e5478,0x19e89fc3
+DD 0xf75f6bfe,0x2a39a045,0x487e8a79,0x951841c2
+DD 0x8cf0de01,0x519615ba,0x33d13f86,0xeeb7f43d
+DD 0xeb52a10d,0x36346ab6,0x5473408a,0x89158b31
+DD 0x90fd14f2,0x4d9bdf49,0x2fdcf575,0xf2ba3ece
+DD 0x1c0dcaf3,0xc16b0148,0xa32c2b74,0x7e4ae0cf
+DD 0x67a27f0c,0xbac4b4b7,0xd8839e8b,0x05e55530
+DD 0xd34934eb,0x0e2fff50,0x6c68d56c,0xb10e1ed7
+DD 0xa8e68114,0x75804aaf,0x17c76093,0xcaa1ab28
+DD 0x24165f15,0xf97094ae,0x9b37be92,0x46517529
+DD 0x5fb9eaea,0x82df2151,0xe0980b6d,0x3dfec0d6
+DD 0x381b95e6,0xe57d5e5d,0x873a7461,0x5a5cbfda
+DD 0x43b42019,0x9ed2eba2,0xfc95c19e,0x21f30a25
+DD 0xcf44fe18,0x122235a3,0x70651f9f,0xad03d424
+DD 0xb4eb4be7,0x698d805c,0x0bcaaa60,0xd6ac61db
+DD 0xa37e1f27,0x7e18d49c,0x1c5ffea0,0xc139351b
+DD 0xd8d1aad8,0x05b76163,0x67f04b5f,0xba9680e4
+DD 0x542174d9,0x8947bf62,0xeb00955e,0x36665ee5
+DD 0x2f8ec126,0xf2e80a9d,0x90af20a1,0x4dc9eb1a
+DD 0x482cbe2a,0x954a7591,0xf70d5fad,0x2a6b9416
+DD 0x33830bd5,0xeee5c06e,0x8ca2ea52,0x51c421e9
+DD 0xbf73d5d4,0x62151e6f,0x00523453,0xdd34ffe8
+DD 0xc4dc602b,0x19baab90,0x7bfd81ac,0xa69b4a17
+DD 0x70372bcc,0xad51e077,0xcf16ca4b,0x127001f0
+DD 0x0b989e33,0xd6fe5588,0xb4b97fb4,0x69dfb40f
+DD 0x87684032,0x5a0e8b89,0x3849a1b5,0xe52f6a0e
+DD 0xfcc7f5cd,0x21a13e76,0x43e6144a,0x9e80dff1
+DD 0x9b658ac1,0x4603417a,0x24446b46,0xf922a0fd
+DD 0xe0ca3f3e,0x3dacf485,0x5febdeb9,0x828d1502
+DD 0x6c3ae13f,0xb15c2a84,0xd31b00b8,0x0e7dcb03
+DD 0x179554c0,0xcaf39f7b,0xa8b4b547,0x75d27efc
+DD 0x431048bf,0x9e768304,0xfc31a938,0x21576283
+DD 0x38bffd40,0xe5d936fb,0x879e1cc7,0x5af8d77c
+DD 0xb44f2341,0x6929e8fa,0x0b6ec2c6,0xd608097d
+DD 0xcfe096be,0x12865d05,0x70c17739,0xada7bc82
+DD 0xa842e9b2,0x75242209,0x17630835,0xca05c38e
+DD 0xd3ed5c4d,0x0e8b97f6,0x6cccbdca,0xb1aa7671
+DD 0x5f1d824c,0x827b49f7,0xe03c63cb,0x3d5aa870
+DD 0x24b237b3,0xf9d4fc08,0x9b93d634,0x46f51d8f
+DD 0x90597c54,0x4d3fb7ef,0x2f789dd3,0xf21e5668
+DD 0xebf6c9ab,0x36900210,0x54d7282c,0x89b1e397
+DD 0x670617aa,0xba60dc11,0xd827f62d,0x05413d96
+DD 0x1ca9a255,0xc1cf69ee,0xa38843d2,0x7eee8869
+DD 0x7b0bdd59,0xa66d16e2,0xc42a3cde,0x194cf765
+DD 0x00a468a6,0xddc2a31d,0xbf858921,0x62e3429a
+DD 0x8c54b6a7,0x51327d1c,0x33755720,0xee139c9b
+DD 0xf7fb0358,0x2a9dc8e3,0x48dae2df,0x95bc2964
+DD 0xe06e5798,0x3d089c23,0x5f4fb61f,0x82297da4
+DD 0x9bc1e267,0x46a729dc,0x24e003e0,0xf986c85b
+DD 0x17313c66,0xca57f7dd,0xa810dde1,0x7576165a
+DD 0x6c9e8999,0xb1f84222,0xd3bf681e,0x0ed9a3a5
+DD 0x0b3cf695,0xd65a3d2e,0xb41d1712,0x697bdca9
+DD 0x7093436a,0xadf588d1,0xcfb2a2ed,0x12d46956
+DD 0xfc639d6b,0x210556d0,0x43427cec,0x9e24b757
+DD 0x87cc2894,0x5aaae32f,0x38edc913,0xe58b02a8
+DD 0x33276373,0xee41a8c8,0x8c0682f4,0x5160494f
+DD 0x4888d68c,0x95ee1d37,0xf7a9370b,0x2acffcb0
+DD 0xc478088d,0x191ec336,0x7b59e90a,0xa63f22b1
+DD 0xbfd7bd72,0x62b176c9,0x00f65cf5,0xdd90974e
+DD 0xd875c27e,0x051309c5,0x675423f9,0xba32e842
+DD 0xa3da7781,0x7ebcbc3a,0x1cfb9606,0xc19d5dbd
+DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc
+DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643
+
+%macro slversion 4
+global %1_slver_%2%3%4
+global %1_slver
+%1_slver:
+%1_slver_%2%3%4:
+ dw 0x%4
+ db 0x%3, 0x%2
+%endmacro
+;;; func core, ver, snum
+slversion crc32_iscsi_00, 00, 02, 0014
diff --git a/src/common/sctp_crc32.c b/src/common/sctp_crc32.c
index b11bb48dd87..7e2678a2b7c 100644
--- a/src/common/sctp_crc32.c
+++ b/src/common/sctp_crc32.c
@@ -728,7 +728,7 @@ sctp_csum_finalize(uint32_t crc32c)
}
#endif
-uint32_t ceph_crc32c_le_generic(uint32_t crc, unsigned char const *data, unsigned length)
+uint32_t ceph_crc32c_sctp(uint32_t crc, unsigned char const *data, unsigned length)
{
return update_crc32(crc, data, length);
}
diff --git a/src/common/sctp_crc32.h b/src/common/sctp_crc32.h
new file mode 100644
index 00000000000..92d20bcb7cc
--- /dev/null
+++ b/src/common/sctp_crc32.h
@@ -0,0 +1,14 @@
+#ifndef CEPH_COMMON_SCTP_CRC32_H
+#define CEPH_COMMON_SCTP_CRC32_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uint32_t ceph_crc32c_sctp(uint32_t crc, unsigned char const *data, unsigned length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp
index a62aa0d9ce3..6579bd4ba71 100644
--- a/src/common/sharedptr_registry.hpp
+++ b/src/common/sharedptr_registry.hpp
@@ -58,6 +58,26 @@ public:
lock("SharedPtrRegistry::lock")
{}
+ bool empty() {
+ Mutex::Locker l(lock);
+ return contents.empty();
+ }
+
+ bool get_next(const K &key, pair<K, VPtr> *next) {
+ VPtr next_val;
+ Mutex::Locker l(lock);
+ typename map<K, WeakVPtr>::iterator i = contents.upper_bound(key);
+ while (i != contents.end() &&
+ !(next_val = i->second.lock()))
+ ++i;
+ if (i == contents.end())
+ return false;
+ if (next)
+ *next = make_pair(i->first, next_val);
+ return true;
+ }
+
+
bool get_next(const K &key, pair<K, V> *next) {
VPtr next_val;
Mutex::Locker l(lock);
diff --git a/src/include/buffer.h b/src/include/buffer.h
index 8c4dfb56e17..8e637d658c5 100644
--- a/src/include/buffer.h
+++ b/src/include/buffer.h
@@ -425,7 +425,7 @@ public:
it != _buffers.end();
++it)
if (it->length())
- crc = ceph_crc32c_le(crc, (unsigned char*)it->c_str(), it->length());
+ crc = ceph_crc32c(crc, (unsigned char*)it->c_str(), it->length());
return crc;
}
diff --git a/src/include/crc32c.h b/src/include/crc32c.h
index 3fd209efb02..d5f7388be56 100644
--- a/src/include/crc32c.h
+++ b/src/include/crc32c.h
@@ -1,25 +1,25 @@
#ifndef CEPH_CRC32C_H
#define CEPH_CRC32C_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
+#include "include/inttypes.h"
#include <string.h>
-extern int ceph_have_crc32c_intel(void);
-extern uint32_t ceph_crc32c_le_generic(uint32_t crc, unsigned char const *data, unsigned length);
-extern uint32_t ceph_crc32c_le_intel(uint32_t crc, unsigned char const *data, unsigned length);
+typedef uint32_t (*ceph_crc32c_func_t)(uint32_t crc, unsigned char const *data, unsigned length);
-static inline uint32_t ceph_crc32c_le(uint32_t crc, unsigned char const *data, unsigned length) {
- if (ceph_have_crc32c_intel()) //__builtin_cpu_supports("sse4.2"))
- return ceph_crc32c_le_intel(crc, data, length);
- else
- return ceph_crc32c_le_generic(crc, data, length);
-}
+/*
+ * this is a static global with the chosen crc32c implementation for
+ * the given architecture.
+ */
+extern ceph_crc32c_func_t ceph_crc32c_func;
-#ifdef __cplusplus
+extern ceph_crc32c_func_t ceph_choose_crc32(void);
+
+/*
+ * common entry point; use this!
+ */
+static inline uint32_t ceph_crc32c(uint32_t crc, unsigned char const *data, unsigned length)
+{
+ return ceph_crc32c_func(crc, data, length);
}
-#endif
#endif
diff --git a/src/init-ceph.in b/src/init-ceph.in
index 7d003e6370c..3a404a46c6f 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -34,6 +34,10 @@ usage_exit() {
exit
}
+# behave if we are not completely installed (e.g., Debian "removed,
+# config remains" state)
+test -f $LIBDIR/ceph_common.sh || exit 0
+
. $LIBDIR/ceph_common.sh
EXIT_STATUS=0
diff --git a/src/init-rbdmap b/src/init-rbdmap
index 54554ae40d7..e04424fcd78 100755
--- a/src/init-rbdmap
+++ b/src/init-rbdmap
@@ -68,7 +68,7 @@ do_unmap() {
umount $MNT
done
# Unmap all rbd device
- if [ -b /dev/rbd[0-9]* ]; then
+ if ls /dev/rbd[0-9]* >/dev/null 2>&1; then
for DEV in /dev/rbd[0-9]*; do
log_progress_msg $DEV
rbd unmap $DEV
diff --git a/src/librados-config.cc b/src/librados-config.cc
index a0a064fd1d7..ffe758129b8 100644
--- a/src/librados-config.cc
+++ b/src/librados-config.cc
@@ -42,7 +42,8 @@ int main(int argc, const char **argv)
bool opt_version = false;
bool opt_vernum = false;
- global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+ global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
for (std::vector<const char*>::iterator i = args.begin();
diff --git a/src/librados/AioCompletionImpl.h b/src/librados/AioCompletionImpl.h
index cf049e0a9a2..a40282a7c25 100644
--- a/src/librados/AioCompletionImpl.h
+++ b/src/librados/AioCompletionImpl.h
@@ -35,7 +35,7 @@ struct librados::AioCompletionImpl {
eversion_t objver;
rados_callback_t callback_complete, callback_safe;
- void *callback_arg;
+ void *callback_complete_arg, *callback_safe_arg;
// for read
bool is_read;
@@ -49,21 +49,24 @@ struct librados::AioCompletionImpl {
AioCompletionImpl() : lock("AioCompletionImpl lock", false, false),
ref(1), rval(0), released(false), ack(false), safe(false),
- callback_complete(0), callback_safe(0), callback_arg(0),
+ callback_complete(0),
+ callback_safe(0),
+ callback_complete_arg(0),
+ callback_safe_arg(0),
is_read(false), pbl(0), buf(0), maxlen(0),
io(NULL), aio_write_seq(0), aio_write_list_item(this) { }
int set_complete_callback(void *cb_arg, rados_callback_t cb) {
lock.Lock();
callback_complete = cb;
- callback_arg = cb_arg;
+ callback_complete_arg = cb_arg;
lock.Unlock();
return 0;
}
int set_safe_callback(void *cb_arg, rados_callback_t cb) {
lock.Lock();
callback_safe = cb;
- callback_arg = cb_arg;
+ callback_safe_arg = cb_arg;
lock.Unlock();
return 0;
}
@@ -171,7 +174,7 @@ struct C_AioComplete : public Context {
void finish(int r) {
rados_callback_t cb = c->callback_complete;
- void *cb_arg = c->callback_arg;
+ void *cb_arg = c->callback_complete_arg;
cb(c, cb_arg);
c->lock.Lock();
@@ -190,7 +193,7 @@ struct C_AioSafe : public Context {
void finish(int r) {
rados_callback_t cb = c->callback_safe;
- void *cb_arg = c->callback_arg;
+ void *cb_arg = c->callback_safe_arg;
cb(c, cb_arg);
c->lock.Lock();
@@ -222,13 +225,14 @@ struct C_AioCompleteAndSafe : public Context {
c->safe = true;
c->lock.Unlock();
rados_callback_t cb_complete = c->callback_complete;
- void *cb_arg = c->callback_arg;
+ void *cb_complete_arg = c->callback_complete_arg;
if (cb_complete)
- cb_complete(c, cb_arg);
+ cb_complete(c, cb_complete_arg);
rados_callback_t cb_safe = c->callback_safe;
+ void *cb_safe_arg = c->callback_safe_arg;
if (cb_safe)
- cb_safe(c, cb_arg);
+ cb_safe(c, cb_safe_arg);
c->lock.Lock();
c->callback_complete = NULL;
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 898dcd39f48..86b380f2827 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -7935,7 +7935,7 @@ void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err
inode_backtrace_t backtrace;
if (err == 0) {
::decode(backtrace, bl);
- if (backtrace.pool != info.pool) {
+ if (backtrace.pool != info.pool && backtrace.pool != -1) {
dout(10) << " old object in pool " << info.pool
<< ", retrying pool " << backtrace.pool << dendl;
info.pool = backtrace.pool;
diff --git a/src/mds/flock.cc b/src/mds/flock.cc
index e83c5ee23a0..5e329afafb7 100644
--- a/src/mds/flock.cc
+++ b/src/mds/flock.cc
@@ -75,12 +75,14 @@ bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock,
} else {
//yay, we can insert a shared lock
dout(15) << "inserting shared lock" << dendl;
+ remove_waiting(new_lock);
adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
ret = true;
}
}
} else { //no overlapping locks except our own
+ remove_waiting(new_lock);
adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
dout(15) << "no conflicts, inserting " << new_lock << dendl;
held_locks.insert(pair<uint64_t, ceph_filelock>
@@ -89,7 +91,6 @@ bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock,
}
if (ret) {
++client_held_lock_counts[(client_t)new_lock.client];
- remove_waiting(new_lock);
}
else if (wait_on_fail && !replay)
++client_waiting_lock_counts[(client_t)new_lock.client];
@@ -306,7 +307,7 @@ void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::ite
old_lock = &(*iter)->second;
old_lock_client = old_lock->client;
dout(15) << "lock to coalesce: " << *old_lock << dendl;
- /* because if it's a neibhoring lock there can't be any self-overlapping
+ /* because if it's a neighboring lock there can't be any self-overlapping
locks that covered it */
if (old_lock->type == new_lock.type) { //merge them
if (0 == new_lock.length) {
diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc
index d8bccce9bc2..644d614bdf9 100644
--- a/src/mon/MonCap.cc
+++ b/src/mon/MonCap.cc
@@ -346,7 +346,7 @@ struct MonCapParser : qi::grammar<Iterator, MonCap()>
quoted_string %=
lexeme['"' >> +(char_ - '"') >> '"'] |
lexeme['\'' >> +(char_ - '\'') >> '\''];
- unquoted_word %= +char_("a-zA-Z0-9_-");
+ unquoted_word %= +char_("a-zA-Z0-9_.-");
str %= quoted_string | unquoted_word;
spaces = +lit(' ');
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 8d85e03ed99..ec1ee71c9e1 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -111,7 +111,7 @@ COMMAND("pg getmap", "get binary pg map to -o/stdout", "pg", "r", "cli,rest")
COMMAND("pg send_pg_creates", "trigger pg creates to be issued",\
"pg", "rw", "cli,rest")
COMMAND("pg dump " \
- "name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs|pgs_brief,n=N,req=false", \
+ "name=dumpcontents,type=CephChoices,strings=all|summary|sum|delta|pools|osds|pgs|pgs_brief,n=N,req=false", \
"show human-readable versions of pg map", "pg", "r", "cli,rest")
COMMAND("pg dump_json " \
"name=dumpcontents,type=CephChoices,strings=all|summary|sum|pools|osds|pgs,n=N,req=false", \
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 61e2a2aa57a..45ca02027fc 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -1878,7 +1878,6 @@ bool Monitor::_allowed_command(MonSession *s, string &module, string &prefix,
MonCommand *this_cmd = NULL;
for (MonCommand *cp = mon_commands;
cp < &mon_commands[ARRAY_SIZE(mon_commands)]; cp++) {
- dout(0) << __func__ << " CAPSBAR >> matching against " << cp->cmdstring << dendl;
if (cp->cmdstring.find(prefix) != string::npos) {
this_cmd = cp;
break;
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index 1c5ac710d7a..e9a35c6b8ab 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -513,13 +513,18 @@ void PGMap::dump_basic(Formatter *f) const
pg_sum.dump(f);
f->close_section();
- f->open_object_section("pg_stats_delta");
- pg_sum_delta.dump(f);
- f->close_section();
-
f->open_object_section("osd_stats_sum");
osd_sum.dump(f);
f->close_section();
+
+ dump_delta(f);
+}
+
+void PGMap::dump_delta(Formatter *f) const
+{
+ f->open_object_section("pg_stats_delta");
+ pg_sum_delta.dump(f);
+ f->close_section();
}
void PGMap::dump_pg_stats(Formatter *f, bool brief) const
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h
index 00aa01ed07b..84d89f87517 100644
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -158,6 +158,7 @@ public:
void dump_pg_stats(Formatter *f, bool brief) const;
void dump_pool_stats(Formatter *f) const;
void dump_osd_stats(Formatter *f) const;
+ void dump_delta(Formatter *f) const;
void dump_pg_stats_plain(ostream& ss,
const hash_map<pg_t, pg_stat_t>& pg_stats) const;
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index bb5f447a4e3..ff2d1fe4947 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -1406,6 +1406,11 @@ bool PGMonitor::preprocess_command(MMonCommand *m)
if (what.count("pgs_brief")) {
pg_map.dump_pg_stats(f.get(), true);
}
+ if (what.count("delta")) {
+ f->open_object_section("delta");
+ pg_map.dump_delta(f.get());
+ f->close_section();
+ }
}
f->flush(ds);
} else {
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index 347810775c0..016686aae06 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -495,7 +495,7 @@ void Paxos::handle_last(MMonPaxos *last)
}
if (need_refresh)
- do_refresh();
+ (void)do_refresh();
last->put();
}
diff --git a/src/msg/Message.h b/src/msg/Message.h
index 3ed8ee667d2..f345e7adaab 100644
--- a/src/msg/Message.h
+++ b/src/msg/Message.h
@@ -449,8 +449,8 @@ public:
const utime_t& get_recv_complete_stamp() const { return recv_complete_stamp; }
void calc_header_crc() {
- header.crc = ceph_crc32c_le(0, (unsigned char*)&header,
- sizeof(header) - sizeof(header.crc));
+ header.crc = ceph_crc32c(0, (unsigned char*)&header,
+ sizeof(header) - sizeof(header.crc));
}
void calc_front_crc() {
footer.front_crc = payload.crc32c(0);
diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc
index 6f271c812f3..50656fee53b 100644
--- a/src/msg/Pipe.cc
+++ b/src/msg/Pipe.cc
@@ -1684,7 +1684,7 @@ int Pipe::read_message(Message **pm)
if (connection_state->has_feature(CEPH_FEATURE_NOSRCADDR)) {
if (tcp_read((char*)&header, sizeof(header)) < 0)
return -1;
- header_crc = ceph_crc32c_le(0, (unsigned char *)&header, sizeof(header) - sizeof(header.crc));
+ header_crc = ceph_crc32c(0, (unsigned char *)&header, sizeof(header) - sizeof(header.crc));
} else {
ceph_msg_header_old oldheader;
if (tcp_read((char*)&oldheader, sizeof(oldheader)) < 0)
@@ -1694,7 +1694,7 @@ int Pipe::read_message(Message **pm)
header.src = oldheader.src.name;
header.reserved = oldheader.reserved;
header.crc = oldheader.crc;
- header_crc = ceph_crc32c_le(0, (unsigned char *)&oldheader, sizeof(oldheader) - sizeof(oldheader.crc));
+ header_crc = ceph_crc32c(0, (unsigned char *)&oldheader, sizeof(oldheader) - sizeof(oldheader.crc));
}
ldout(msgr->cct,20) << "reader got envelope type=" << header.type
@@ -2028,8 +2028,8 @@ int Pipe::write_message(ceph_msg_header& header, ceph_msg_footer& footer, buffer
oldheader.src.addr = connection_state->get_peer_addr();
oldheader.orig_src = oldheader.src;
oldheader.reserved = header.reserved;
- oldheader.crc = ceph_crc32c_le(0, (unsigned char*)&oldheader,
- sizeof(oldheader) - sizeof(oldheader.crc));
+ oldheader.crc = ceph_crc32c(0, (unsigned char*)&oldheader,
+ sizeof(oldheader) - sizeof(oldheader.crc));
msgvec[msg.msg_iovlen].iov_base = (char*)&oldheader;
msgvec[msg.msg_iovlen].iov_len = sizeof(oldheader);
msglen += sizeof(oldheader);
diff --git a/src/os/BtrfsFileStoreBackend.cc b/src/os/BtrfsFileStoreBackend.cc
index 580a4b5bebf..9fa96babab7 100644
--- a/src/os/BtrfsFileStoreBackend.cc
+++ b/src/os/BtrfsFileStoreBackend.cc
@@ -51,7 +51,8 @@
#define ALIGN_UP(x, by) (ALIGNED((x), (by)) ? (x) : (ALIGN_DOWN((x), (by)) + (by)))
BtrfsFileStoreBackend::BtrfsFileStoreBackend(FileStore *fs):
- GenericFileStoreBackend(fs), has_clone_range(false), has_snap_create(false),
+ GenericFileStoreBackend(fs), has_clone_range(false),
+ has_snap_create(false), has_snap_destroy(false),
has_snap_create_v2(false), has_wait_sync(false), stable_commits(false),
m_filestore_btrfs_clone_range(g_conf->filestore_btrfs_clone_range),
m_filestore_btrfs_snap (g_conf->filestore_btrfs_snap) { }
@@ -298,8 +299,10 @@ int BtrfsFileStoreBackend::create_current()
int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls)
{
+ int ret, err = 0;
+
struct stat basest;
- int ret = ::fstat(get_basedir_fd(), &basest);
+ ret = ::fstat(get_basedir_fd(), &basest);
if (ret < 0) {
ret = -errno;
dout(0) << "list_checkpoints: cannot fstat basedir " << cpp_strerror(ret) << dendl;
@@ -327,10 +330,10 @@ int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls)
struct stat st;
ret = ::stat(path, &st);
if (ret < 0) {
- ret = -errno;
+ err = -errno;
dout(0) << "list_checkpoints: stat '" << path << "' failed: "
- << cpp_strerror(ret) << dendl;
- return ret;
+ << cpp_strerror(err) << dendl;
+ break;
}
if (!S_ISDIR(st.st_mode))
@@ -339,10 +342,10 @@ int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls)
struct statfs fs;
ret = ::statfs(path, &fs);
if (ret < 0) {
- ret = -errno;
+ err = -errno;
dout(0) << "list_checkpoints: statfs '" << path << "' failed: "
- << cpp_strerror(ret) << dendl;
- return ret;
+ << cpp_strerror(err) << dendl;
+ break;
}
if (fs.f_type == BTRFS_SUPER_MAGIC && basest.st_dev != st.st_dev)
@@ -352,9 +355,13 @@ int BtrfsFileStoreBackend::list_checkpoints(list<string>& ls)
if (::closedir(dir) < 0) {
ret = -errno;
dout(0) << "list_checkpoints: closedir failed: " << cpp_strerror(ret) << dendl;
- return ret;
+ if (!err)
+ err = ret;
}
+ if (err)
+ return err;
+
ls.swap(snaps);
return 0;
}
@@ -367,7 +374,7 @@ int BtrfsFileStoreBackend::create_checkpoint(const string& name, uint64_t *trans
memset(&async_args, 0, sizeof(async_args));
async_args.fd = get_current_fd();
async_args.flags = BTRFS_SUBVOL_CREATE_ASYNC;
- strcpy(async_args.name, name.c_str());
+ strncpy(async_args.name, name.c_str(), sizeof(async_args.name));
int r = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_CREATE_V2, &async_args);
if (r < 0) {
@@ -455,7 +462,7 @@ int BtrfsFileStoreBackend::destroy_checkpoint(const string& name)
btrfs_ioctl_vol_args vol_args;
memset(&vol_args, 0, sizeof(vol_args));
vol_args.fd = 0;
- strcpy(vol_args.name, name.c_str());
+ strncpy(vol_args.name, name.c_str(), sizeof(vol_args.name));
int ret = ::ioctl(get_basedir_fd(), BTRFS_IOC_SNAP_DESTROY, &vol_args);
if (ret) {
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 55f11707189..3e19f5634c1 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1522,7 +1522,6 @@ int OSD::shutdown()
dout(20) << " kicking pg " << p->first << dendl;
p->second->lock();
p->second->on_shutdown();
- p->second->kick();
p->second->unlock();
p->second->osr->flush();
}
@@ -3513,7 +3512,7 @@ bool OSD::_is_healthy()
++up;
++num;
}
- if (up < num / 3) {
+ if ((float)up < (float)num * g_conf->osd_heartbeat_min_healthy_ratio) {
dout(1) << "is_healthy false -- only " << up << "/" << num << " up peers (less than 1/3)" << dendl;
return false;
}
diff --git a/src/osd/OSDCap.cc b/src/osd/OSDCap.cc
index ee77f0ea43d..e315835f4ba 100644
--- a/src/osd/OSDCap.cc
+++ b/src/osd/OSDCap.cc
@@ -178,7 +178,7 @@ struct OSDCapParser : qi::grammar<Iterator, OSDCap()>
equoted_string %=
lexeme['"' >> *(char_ - '"') >> '"'] |
lexeme['\'' >> *(char_ - '\'') >> '\''];
- unquoted_word %= +char_("a-zA-Z0-9_-");
+ unquoted_word %= +char_("a-zA-Z0-9_.-");
str %= quoted_string | unquoted_word;
estr %= equoted_string | unquoted_word;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 49ea61a603a..cd5621cddf2 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -4527,9 +4527,6 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
{
const OSDMapRef osdmap = get_osdmap();
- // -- there was a change! --
- kick();
-
set_last_peering_reset();
vector<int> oldacting, oldup;
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 14ac7c9fac5..720ce67bca3 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -232,7 +232,6 @@ protected:
* put_unlock() when done with the current pointer (_most common_).
*/
Mutex _lock;
- Cond _cond;
atomic_t ref;
#ifdef PG_DEBUG_REFS
@@ -261,14 +260,6 @@ public:
bool is_locked() const {
return _lock.is_locked();
}
- void wait() {
- assert(_lock.is_locked());
- _cond.Wait(_lock);
- }
- void kick() {
- assert(_lock.is_locked());
- _cond.Signal();
- }
#ifdef PG_DEBUG_REFS
uint64_t get_with_id();
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 6c7ec56af88..bca49024c0b 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -4,6 +4,9 @@
* Ceph - scalable distributed file system
*
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -613,7 +616,9 @@ void ReplicatedPG::calc_trim_to()
ReplicatedPG::ReplicatedPG(OSDService *o, OSDMapRef curmap,
const PGPool &_pool, pg_t p, const hobject_t& oid,
const hobject_t& ioid) :
- PG(o, curmap, _pool, p, oid, ioid), temp_created(false),
+ PG(o, curmap, _pool, p, oid, ioid),
+ snapset_contexts_lock("ReplicatedPG::snapset_contexts"),
+ temp_created(false),
temp_coll(coll_t::make_temp_coll(p)), snap_trimmer_machine(this)
{
snap_trimmer_machine.initiate();
@@ -693,7 +698,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
return;
}
- ObjectContext *obc;
+ ObjectContextRef obc;
bool can_create = op->may_write();
snapid_t snapid;
int r = find_object_context(
@@ -747,7 +752,6 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (!op->may_write() && !obc->obs.exists) {
osd->reply_op_error(op, -ENOENT);
- put_object_context(obc);
return;
}
@@ -756,7 +760,6 @@ void ReplicatedPG::do_op(OpRequestRef op)
dout(10) << "do_op writes for " << obc->obs.oi.soid << " blocked by "
<< obc->blocked_by->obs.oi.soid << dendl;
wait_for_degraded_object(obc->blocked_by->obs.oi.soid, op);
- put_object_context(obc);
return;
}
@@ -773,7 +776,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
}
// src_oids
- map<hobject_t,ObjectContext*> src_obc;
+ map<hobject_t,ObjectContextRef> src_obc;
for (vector<OSDOp>::iterator p = m->ops.begin(); p != m->ops.end(); ++p) {
OSDOp& osd_op = *p;
@@ -781,7 +784,6 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (osd_op.op.op == CEPH_OSD_OP_LIST_SNAPS &&
m->get_snapid() != CEPH_SNAPDIR) {
dout(10) << "LIST_SNAPS with incorrect context" << dendl;
- put_object_context(obc);
osd->reply_op_error(op, -EINVAL);
return;
}
@@ -794,7 +796,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
hobject_t src_oid(osd_op.soid, src_oloc.key, m->get_pg().ps(),
info.pgid.pool(), m->get_object_locator().nspace);
if (!src_obc.count(src_oid)) {
- ObjectContext *sobc;
+ ObjectContextRef sobc;
snapid_t ssnapid;
int r = find_object_context(src_oid, &sobc, false, &ssnapid);
@@ -816,10 +818,8 @@ void ReplicatedPG::do_op(OpRequestRef op)
(before_backfill && sobc->obs.oi.soid > backfill_target_info->last_backfill)) {
wait_for_degraded_object(sobc->obs.oi.soid, op);
dout(10) << " writes for " << obc->obs.oi.soid << " now blocked by "
- << sobc->obs.oi.soid << dendl;
- obc->get();
+ << sobc->obs.oi.soid << dendl;
obc->blocked_by = sobc;
- sobc->get();
sobc->blocking.insert(obc);
} else {
dout(10) << " src_oid " << src_oid << " obc " << src_obc << dendl;
@@ -836,8 +836,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
dout(10) << "no src oid specified for multi op " << osd_op << dendl;
osd->reply_op_error(op, -EINVAL);
}
- put_object_contexts(src_obc);
- put_object_context(obc);
+ src_obc.clear();
return;
}
@@ -852,7 +851,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
hobject_t clone_oid = obc->obs.oi.soid;
clone_oid.snap = *p;
if (!src_obc.count(clone_oid)) {
- ObjectContext *sobc;
+ ObjectContextRef sobc;
snapid_t ssnapid;
int r = find_object_context(clone_oid, &sobc, false, &ssnapid);
@@ -868,8 +867,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
src_obc[clone_oid] = sobc;
continue;
}
- put_object_contexts(src_obc);
- put_object_context(obc);
+ src_obc.clear();
return;
} else {
continue;
@@ -902,8 +900,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
<< " < snapset seq " << obc->ssc->snapset.seq
<< " on " << soid << dendl;
delete ctx;
- put_object_context(obc);
- put_object_contexts(src_obc);
+ src_obc.clear();
osd->reply_op_error(op, -EOLDSNAPC);
return;
}
@@ -912,8 +909,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (oldv != eversion_t()) {
dout(3) << "do_op dup " << ctx->reqid << " was " << oldv << dendl;
delete ctx;
- put_object_context(obc);
- put_object_contexts(src_obc);
+ src_obc.clear();
if (already_complete(oldv)) {
osd->reply_op_error(op, 0, oldv);
} else {
@@ -970,7 +966,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
dout(10) << " taking ondisk_read_lock" << dendl;
obc->ondisk_read_lock();
}
- for (map<hobject_t,ObjectContext*>::iterator p = src_obc.begin(); p != src_obc.end(); ++p) {
+ for (map<hobject_t,ObjectContextRef>::iterator p = src_obc.begin(); p != src_obc.end(); ++p) {
dout(10) << " taking ondisk_read_lock for src " << p->first << dendl;
p->second->ondisk_read_lock();
}
@@ -981,7 +977,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
dout(10) << " dropping ondisk_read_lock" << dendl;
obc->ondisk_read_unlock();
}
- for (map<hobject_t,ObjectContext*>::iterator p = src_obc.begin(); p != src_obc.end(); ++p) {
+ for (map<hobject_t,ObjectContextRef>::iterator p = src_obc.begin(); p != src_obc.end(); ++p) {
dout(10) << " dropping ondisk_read_lock for src " << p->first << dendl;
p->second->ondisk_read_unlock();
}
@@ -989,8 +985,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (result == -EAGAIN) {
// clean up after the ctx
delete ctx;
- put_object_context(obc);
- put_object_contexts(src_obc);
+ src_obc.clear();
return;
}
@@ -998,8 +993,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
if (ctx->delta_stats.num_bytes > 0 &&
pool.info.get_flags() & pg_pool_t::FLAG_FULL) {
delete ctx;
- put_object_context(obc);
- put_object_contexts(src_obc);
+ src_obc.clear();
osd->reply_op_error(op, -ENOSPC);
return;
}
@@ -1044,8 +1038,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
reply->add_flags(CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK);
osd->send_message_osd_client(reply, m->get_connection());
delete ctx;
- put_object_context(obc);
- put_object_contexts(src_obc);
+ src_obc.clear();
return;
}
@@ -1056,9 +1049,6 @@ void ReplicatedPG::do_op(OpRequestRef op)
append_log(ctx->log, pg_trim_to, ctx->local_t);
- // continuing on to write path, make sure object context is registered
- assert(obc->registered);
-
// verify that we are doing this in order?
if (g_conf->osd_debug_op_order && m->get_source().is_client()) {
map<client_t,tid_t>& cm = debug_op_order[obc->obs.oi.soid];
@@ -1466,14 +1456,13 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
{
// load clone info
bufferlist bl;
- ObjectContext *obc = 0;
+ ObjectContextRef obc;
int r = find_object_context(coid, &obc, false, NULL);
if (r == -ENOENT || coid.snap != obc->obs.oi.soid.snap) {
derr << __func__ << "could not find coid " << coid << dendl;
assert(0);
}
assert(r == 0);
- assert(obc->registered);
object_info_t &coi = obc->obs.oi;
set<snapid_t> old_snaps(coi.snaps.begin(), coi.snaps.end());
@@ -1615,7 +1604,6 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid)
snapset.head_exists ? CEPH_NOSNAP:CEPH_SNAPDIR, coid.hash,
info.pgid.pool(), coid.get_namespace());
ctx->snapset_obc = get_object_context(snapoid, false);
- assert(ctx->snapset_obc->registered);
if (snapset.clones.empty() && !snapset.head_exists) {
dout(10) << coid << " removing " << snapoid << dendl;
@@ -2066,7 +2054,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->user_modify = true;
}
- ObjectContext *src_obc = 0;
+ ObjectContextRef src_obc;
if (ceph_osd_op_type_multi(op.op)) {
MOSDOp *m = static_cast<MOSDOp *>(ctx->op->request);
object_locator_t src_oloc;
@@ -2484,7 +2472,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
hobject_t clone_oid = soid;
clone_oid.snap = *clone_iter;
- ObjectContext *clone_obc = ctx->src_obc[clone_oid];
+ ObjectContextRef clone_obc = ctx->src_obc[clone_oid];
assert(clone_obc);
for (vector<snapid_t>::reverse_iterator p = clone_obc->obs.oi.snaps.rbegin();
p != clone_obc->obs.oi.snaps.rend();
@@ -2606,6 +2594,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_WRITE:
++ctx->num_write;
{ // write
+ if (op.extent.length != osd_op.indata.length()) {
+ result = -EINVAL;
+ break;
+ }
__u32 seq = oi.truncate_seq;
if (seq && (seq > op.extent.truncate_seq) &&
(op.extent.offset + op.extent.length > oi.size)) {
@@ -2637,9 +2629,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = check_offset_and_length(op.extent.offset, op.extent.length);
if (result < 0)
break;
- bufferlist nbl;
- bp.copy(op.extent.length, nbl);
- t.write(coll, soid, op.extent.offset, op.extent.length, nbl);
+ t.write(coll, soid, op.extent.offset, op.extent.length, osd_op.indata);
write_update_size_and_usage(ctx->delta_stats, oi, ssc->snapset, ctx->modified_ranges,
op.extent.offset, op.extent.length, true);
if (!obs.exists) {
@@ -2652,18 +2642,20 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
case CEPH_OSD_OP_WRITEFULL:
++ctx->num_write;
{ // write full object
+ if (op.extent.length != osd_op.indata.length()) {
+ result = -EINVAL;
+ break;
+ }
result = check_offset_and_length(op.extent.offset, op.extent.length);
if (result < 0)
break;
- bufferlist nbl;
- bp.copy(op.extent.length, nbl);
if (obs.exists) {
t.truncate(coll, soid, 0);
} else {
ctx->delta_stats.num_objects++;
obs.exists = true;
}
- t.write(coll, soid, op.extent.offset, op.extent.length, nbl);
+ t.write(coll, soid, op.extent.offset, op.extent.length, osd_op.indata);
interval_set<uint64_t> ch;
if (oi.size > 0)
ch.insert(0, oi.size);
@@ -2825,16 +2817,15 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
uint64_t cookie = op.watch.cookie;
bool do_watch = op.watch.flag & 1;
entity_name_t entity = ctx->reqid.name;
- ObjectContext *obc = ctx->obc;
+ ObjectContextRef obc = ctx->obc;
- dout(10) << "watch: ctx->obc=" << (void *)obc << " cookie=" << cookie
+ dout(10) << "watch: ctx->obc=" << (void *)obc.get() << " cookie=" << cookie
<< " oi.version=" << oi.version.version << " ctx->at_version=" << ctx->at_version << dendl;
dout(10) << "watch: oi.user_version=" << oi.user_version.version << dendl;
dout(10) << "watch: peer_addr="
<< ctx->op->request->get_connection()->get_peer_addr() << dendl;
- // FIXME: where does the timeout come from?
- watch_info_t w(cookie, 30,
+ watch_info_t w(cookie, g_conf->osd_client_watch_timeout,
ctx->op->request->get_connection()->get_peer_addr());
if (do_watch) {
if (oi.watchers.count(make_pair(cookie, entity))) {
@@ -2845,7 +2836,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
t.nop(); // make sure update the object_info on disk!
}
ctx->watch_connects.push_back(w);
- assert(obc->registered);
} else {
map<pair<uint64_t, entity_name_t>, watch_info_t>::iterator oi_iter =
oi.watchers.find(make_pair(cookie, entity));
@@ -3428,7 +3418,7 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
dout(10) << "_rollback_to " << soid << " snapid " << snapid << dendl;
- ObjectContext *rollback_to;
+ ObjectContextRef rollback_to;
int ret = find_object_context(
hobject_t(soid.oid, soid.get_key(), snapid, soid.hash, info.pgid.pool(), soid.get_namespace()),
&rollback_to, false, &cloneid);
@@ -3511,7 +3501,6 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
obs.oi.size = rollback_to->obs.oi.size;
snapset.head_exists = true;
}
- put_object_context(rollback_to);
}
return ret;
}
@@ -3567,9 +3556,10 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
object_info_t static_snap_oi(coid);
object_info_t *snap_oi;
if (is_primary()) {
- ctx->clone_obc = new ObjectContext(static_snap_oi, true, NULL);
- ctx->clone_obc->get();
- register_object_context(ctx->clone_obc);
+ ctx->clone_obc = object_contexts.lookup_or_create(static_snap_oi.soid);
+ ctx->clone_obc->destructor_callback = new C_PG_ObjectContext(this, ctx->clone_obc.get());
+ ctx->clone_obc->obs.oi = static_snap_oi;
+ ctx->clone_obc->obs.exists = true;
snap_oi = &ctx->clone_obc->obs.oi;
} else {
snap_oi = &static_snap_oi;
@@ -3819,7 +3809,6 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
ctx->at_version.version++;
ctx->snapset_obc->obs.exists = false;
- assert(ctx->snapset_obc->registered);
}
}
} else if (ctx->new_snapset.clones.size()) {
@@ -3836,7 +3825,6 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
ctx->snapset_obc->obs.oi.version = ctx->at_version;
ctx->snapset_obc->obs.oi.last_reqid = ctx->reqid;
ctx->snapset_obc->obs.oi.mtime = ctx->mtime;
- assert(ctx->snapset_obc->registered);
bufferlist bv(sizeof(ctx->new_obs.oi));
::encode(ctx->snapset_obc->obs.oi, bv);
@@ -3978,17 +3966,14 @@ void ReplicatedPG::op_applied(RepGather *repop)
int whoami = osd->get_nodeid();
if (repop->ctx->clone_obc) {
- put_object_context(repop->ctx->clone_obc);
- repop->ctx->clone_obc = 0;
+ repop->ctx->clone_obc = ObjectContextRef();
}
if (repop->ctx->snapset_obc) {
- put_object_context(repop->ctx->snapset_obc);
- repop->ctx->snapset_obc = 0;
+ repop->ctx->snapset_obc = ObjectContextRef();
}
- put_object_context(repop->obc);
- put_object_contexts(repop->src_obc);
- repop->obc = 0;
+ repop->src_obc.clear();
+ repop->obc = ObjectContextRef();
if (!repop->aborted) {
assert(repop->waitfor_ack.count(whoami) ||
@@ -4264,7 +4249,7 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now,
}
}
-ReplicatedPG::RepGather *ReplicatedPG::new_repop(OpContext *ctx, ObjectContext *obc,
+ReplicatedPG::RepGather *ReplicatedPG::new_repop(OpContext *ctx, ObjectContextRef obc,
tid_t rep_tid)
{
if (ctx->op)
@@ -4349,16 +4334,14 @@ void ReplicatedPG::repop_ack(RepGather *repop, int result, int ack_type,
void ReplicatedPG::get_watchers(list<obj_watch_item_t> &pg_watchers)
{
- for (map<hobject_t, ObjectContext*>::iterator i = object_contexts.begin();
- i != object_contexts.end();
- ++i) {
- i->second->get();
- get_obc_watchers(i->second, pg_watchers);
- put_object_context(i->second);
+ pair<hobject_t, ObjectContextRef> i;
+ while (object_contexts.get_next(i.first, &i)) {
+ ObjectContextRef obc(i.second);
+ get_obc_watchers(obc, pg_watchers);
}
}
-void ReplicatedPG::get_obc_watchers(ObjectContext *obc, list<obj_watch_item_t> &pg_watchers)
+void ReplicatedPG::get_obc_watchers(ObjectContextRef obc, list<obj_watch_item_t> &pg_watchers)
{
for (map<pair<uint64_t, entity_name_t>, WatchRef>::iterator j =
obc->watchers.begin();
@@ -4382,16 +4365,12 @@ void ReplicatedPG::get_obc_watchers(ObjectContext *obc, list<obj_watch_item_t> &
void ReplicatedPG::check_blacklisted_watchers()
{
dout(20) << "ReplicatedPG::check_blacklisted_watchers for pg " << get_pgid() << dendl;
- for (map<hobject_t, ObjectContext*>::iterator i = object_contexts.begin();
- i != object_contexts.end();
- ++i) {
- i->second->get();
- check_blacklisted_obc_watchers(i->second);
- put_object_context(i->second);
- }
+ pair<hobject_t, ObjectContextRef> i;
+ while (object_contexts.get_next(i.first, &i))
+ check_blacklisted_obc_watchers(i.second);
}
-void ReplicatedPG::check_blacklisted_obc_watchers(ObjectContext *obc)
+void ReplicatedPG::check_blacklisted_obc_watchers(ObjectContextRef obc)
{
dout(20) << "ReplicatedPG::check_blacklisted_obc_watchers for obc " << obc->obs.oi.soid << dendl;
for (map<pair<uint64_t, entity_name_t>, WatchRef>::iterator k =
@@ -4411,7 +4390,7 @@ void ReplicatedPG::check_blacklisted_obc_watchers(ObjectContext *obc)
}
}
-void ReplicatedPG::populate_obc_watchers(ObjectContext *obc)
+void ReplicatedPG::populate_obc_watchers(ObjectContextRef obc)
{
assert(is_active());
assert(!is_missing_object(obc->obs.oi.soid) ||
@@ -4447,7 +4426,7 @@ void ReplicatedPG::populate_obc_watchers(ObjectContext *obc)
void ReplicatedPG::handle_watch_timeout(WatchRef watch)
{
- ObjectContext *obc = watch->get_obc(); // handle_watch_timeout owns this ref
+ ObjectContextRef obc = watch->get_obc(); // handle_watch_timeout owns this ref
dout(10) << "handle_watch_timeout obc " << obc << dendl;
if (is_degraded_object(obc->obs.oi.soid)) {
@@ -4457,7 +4436,6 @@ void ReplicatedPG::handle_watch_timeout(WatchRef watch)
dout(10) << "handle_watch_timeout waiting for degraded on obj "
<< obc->obs.oi.soid
<< dendl;
- put_object_context(obc); // callback got its own ref
return;
}
@@ -4468,7 +4446,6 @@ void ReplicatedPG::handle_watch_timeout(WatchRef watch)
scrubber.add_callback(
watch->get_delayed_cb() // This callback!
);
- put_object_context(obc);
return;
}
@@ -4516,41 +4493,35 @@ void ReplicatedPG::handle_watch_timeout(WatchRef watch)
eval_repop(repop);
}
-ObjectContext *ReplicatedPG::_lookup_object_context(const hobject_t& oid)
-{
- map<hobject_t, ObjectContext*>::iterator p = object_contexts.find(oid);
- if (p != object_contexts.end())
- return p->second;
- return NULL;
-}
-
-ObjectContext *ReplicatedPG::create_object_context(const object_info_t& oi,
- SnapSetContext *ssc)
+ObjectContextRef ReplicatedPG::create_object_context(const object_info_t& oi,
+ SnapSetContext *ssc)
{
- ObjectContext *obc = new ObjectContext(oi, false, ssc);
- dout(10) << "create_object_context " << obc << " " << oi.soid << " " << obc->ref << dendl;
- register_object_context(obc);
+ ObjectContextRef obc(object_contexts.lookup_or_create(oi.soid));
+ assert(obc->destructor_callback == NULL);
+ obc->destructor_callback = new C_PG_ObjectContext(this, obc.get());
+ obc->obs.oi = oi;
+ obc->obs.exists = false;
+ obc->ssc = ssc;
+ if (ssc)
+ register_snapset_context(ssc);
+ dout(10) << "create_object_context " << (void*)obc.get() << " " << oi.soid << " " << dendl;
populate_obc_watchers(obc);
- obc->ref++;
return obc;
}
-ObjectContext *ReplicatedPG::get_object_context(const hobject_t& soid,
- bool can_create)
+ObjectContextRef ReplicatedPG::get_object_context(const hobject_t& soid,
+ bool can_create)
{
- map<hobject_t, ObjectContext*>::iterator p = object_contexts.find(soid);
- ObjectContext *obc;
- if (p != object_contexts.end()) {
- obc = p->second;
- dout(10) << "get_object_context " << obc << " " << soid << " " << obc->ref
- << " -> " << (obc->ref+1) << dendl;
+ ObjectContextRef obc = object_contexts.lookup(soid);
+ if (obc) {
+ dout(10) << "get_object_context " << obc << " " << soid << dendl;
} else {
// check disk
bufferlist bv;
int r = osd->store->getattr(coll, soid, OI_ATTR, bv);
if (r < 0) {
if (!can_create)
- return NULL; // -ENOENT!
+ return ObjectContextRef(); // -ENOENT!
// new object.
object_info_t oi(soid);
@@ -4562,49 +4533,41 @@ ObjectContext *ReplicatedPG::get_object_context(const hobject_t& soid,
assert(oi.soid.pool == (int64_t)info.pgid.pool());
- SnapSetContext *ssc = NULL;
- if (can_create)
- ssc = get_snapset_context(soid.oid, soid.get_key(), soid.hash, true, soid.get_namespace());
- obc = new ObjectContext(oi, true, ssc);
+ obc = object_contexts.lookup_or_create(oi.soid);
+ obc->destructor_callback = new C_PG_ObjectContext(this, obc.get());
+ obc->obs.oi = oi;
obc->obs.exists = true;
- register_object_context(obc);
-
- if (can_create && !obc->ssc)
+ if (can_create) {
obc->ssc = get_snapset_context(soid.oid, soid.get_key(), soid.hash, true, soid.get_namespace());
+ register_snapset_context(obc->ssc);
+ }
populate_obc_watchers(obc);
dout(10) << "get_object_context " << obc << " " << soid << " 0 -> 1 read " << obc->obs.oi << dendl;
}
- obc->ref++;
return obc;
}
void ReplicatedPG::context_registry_on_change()
{
- list<ObjectContext *> contexts;
- for (map<hobject_t, ObjectContext*>::iterator i = object_contexts.begin();
- i != object_contexts.end();
- ++i) {
- i->second->get();
- contexts.push_back(i->second);
- for (map<pair<uint64_t, entity_name_t>, WatchRef>::iterator j =
- i->second->watchers.begin();
- j != i->second->watchers.end();
- i->second->watchers.erase(j++)) {
- j->second->discard();
+ pair<hobject_t, ObjectContextRef> i;
+ while (object_contexts.get_next(i.first, &i)) {
+ ObjectContextRef obc(i.second);
+ if (obc) {
+ for (map<pair<uint64_t, entity_name_t>, WatchRef>::iterator j =
+ obc->watchers.begin();
+ j != obc->watchers.end();
+ obc->watchers.erase(j++)) {
+ j->second->discard();
+ }
}
}
- for (list<ObjectContext *>::iterator i = contexts.begin();
- i != contexts.end();
- contexts.erase(i++)) {
- put_object_context(*i);
- }
}
int ReplicatedPG::find_object_context(const hobject_t& oid,
- ObjectContext **pobc,
+ ObjectContextRef *pobc,
bool can_create,
snapid_t *psnapid)
{
@@ -4616,11 +4579,10 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
// want the snapdir?
if (oid.snap == CEPH_SNAPDIR) {
// return head or snapdir, whichever exists.
- ObjectContext *obc = get_object_context(head, can_create);
+ ObjectContextRef obc = get_object_context(head, can_create);
if (obc && !obc->obs.exists) {
// ignore it if the obc exists but the object doesn't
- put_object_context(obc);
- obc = NULL;
+ obc = ObjectContextRef();
}
if (!obc) {
obc = get_object_context(snapdir, can_create);
@@ -4638,7 +4600,7 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
// want the head?
if (oid.snap == CEPH_NOSNAP) {
- ObjectContext *obc = get_object_context(head, can_create);
+ ObjectContextRef obc = get_object_context(head, can_create);
if (!obc)
return -ENOENT;
dout(10) << "find_object_context " << oid << " @" << oid.snap << dendl;
@@ -4661,7 +4623,7 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
// head?
if (oid.snap > ssc->snapset.seq) {
if (ssc->snapset.head_exists) {
- ObjectContext *obc = get_object_context(head, false);
+ ObjectContextRef obc = get_object_context(head, false);
dout(10) << "find_object_context " << head
<< " want " << oid.snap << " > snapset seq " << ssc->snapset.seq
<< " -- HIT " << obc->obs
@@ -4706,7 +4668,7 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
return -EAGAIN;
}
- ObjectContext *obc = get_object_context(soid, false);
+ ObjectContextRef obc = get_object_context(soid, false);
assert(obc);
// clone
@@ -4721,41 +4683,20 @@ int ReplicatedPG::find_object_context(const hobject_t& oid,
} else {
dout(20) << "find_object_context " << soid << " [" << first << "," << last
<< "] does not contain " << oid.snap << " -- DNE" << dendl;
- put_object_context(obc);
return -ENOENT;
}
}
-void ReplicatedPG::put_object_context(ObjectContext *obc)
+void ReplicatedPG::object_context_destructor_callback(ObjectContext *obc)
{
- dout(10) << "put_object_context " << obc << " " << obc->obs.oi.soid << " "
- << obc->ref << " -> " << (obc->ref-1) << dendl;
-
- --obc->ref;
- if (obc->ref == 0) {
- if (obc->ssc)
- put_snapset_context(obc->ssc);
-
- if (obc->registered)
- object_contexts.erase(obc->obs.oi.soid);
- delete obc;
-
- if (object_contexts.empty())
- kick();
- }
-}
+ dout(10) << "object_context_destructor_callback " << obc << " "
+ << obc->obs.oi.soid << dendl;
-void ReplicatedPG::put_object_contexts(map<hobject_t,ObjectContext*>& obcv)
-{
- if (obcv.empty())
- return;
- dout(10) << "put_object_contexts " << obcv << dendl;
- for (map<hobject_t,ObjectContext*>::iterator p = obcv.begin(); p != obcv.end(); ++p)
- put_object_context(p->second);
- obcv.clear();
+ if (obc->ssc)
+ put_snapset_context(obc->ssc);
}
-void ReplicatedPG::add_object_context_to_pg_stat(ObjectContext *obc, pg_stat_t *pgstat)
+void ReplicatedPG::add_object_context_to_pg_stat(ObjectContextRef obc, pg_stat_t *pgstat)
{
object_info_t& oi = obc->obs.oi;
@@ -4797,9 +4738,10 @@ void ReplicatedPG::add_object_context_to_pg_stat(ObjectContext *obc, pg_stat_t *
SnapSetContext *ReplicatedPG::create_snapset_context(const object_t& oid)
{
+ Mutex::Locker l(snapset_contexts_lock);
SnapSetContext *ssc = new SnapSetContext(oid);
dout(10) << "create_snapset_context " << ssc << " " << ssc->oid << dendl;
- register_snapset_context(ssc);
+ _register_snapset_context(ssc);
ssc->ref++;
return ssc;
}
@@ -4810,6 +4752,7 @@ SnapSetContext *ReplicatedPG::get_snapset_context(const object_t& oid,
bool can_create,
const string& nspace)
{
+ Mutex::Locker l(snapset_contexts_lock);
SnapSetContext *ssc;
map<object_t, SnapSetContext*>::iterator p = snapset_contexts.find(oid);
if (p != snapset_contexts.end()) {
@@ -4828,7 +4771,7 @@ SnapSetContext *ReplicatedPG::get_snapset_context(const object_t& oid,
return NULL;
}
ssc = new SnapSetContext(oid);
- register_snapset_context(ssc);
+ _register_snapset_context(ssc);
if (r >= 0) {
bufferlist::iterator bvp = bv.begin();
ssc->snapset.decode(bvp);
@@ -4843,9 +4786,9 @@ SnapSetContext *ReplicatedPG::get_snapset_context(const object_t& oid,
void ReplicatedPG::put_snapset_context(SnapSetContext *ssc)
{
+ Mutex::Locker l(snapset_contexts_lock);
dout(10) << "put_snapset_context " << ssc->oid << " "
<< ssc->ref << " -> " << (ssc->ref-1) << dendl;
-
--ssc->ref;
if (ssc->ref == 0) {
if (ssc->registered)
@@ -5098,7 +5041,7 @@ void ReplicatedPG::sub_op_modify_reply(OpRequestRef op)
// ===========================================================
-void ReplicatedPG::calc_head_subsets(ObjectContext *obc, SnapSet& snapset, const hobject_t& head,
+void ReplicatedPG::calc_head_subsets(ObjectContextRef obc, SnapSet& snapset, const hobject_t& head,
pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
@@ -5383,7 +5326,7 @@ void ReplicatedPG::send_remove_op(const hobject_t& oid, eversion_t v, int peer)
* clones/heads and dup data ranges where possible.
*/
void ReplicatedPG::prep_push_to_replica(
- ObjectContext *obc, const hobject_t& soid, int peer,
+ ObjectContextRef obc, const hobject_t& soid, int peer,
int prio,
PushOp *pop)
{
@@ -5437,7 +5380,7 @@ void ReplicatedPG::prep_push_to_replica(
}
void ReplicatedPG::prep_push(int prio,
- ObjectContext *obc,
+ ObjectContextRef obc,
const hobject_t& soid, int peer,
PushOp *pop)
{
@@ -5453,7 +5396,7 @@ void ReplicatedPG::prep_push(int prio,
void ReplicatedPG::prep_push(
int prio,
- ObjectContext *obc,
+ ObjectContextRef obc,
const hobject_t& soid, int peer,
eversion_t version,
interval_set<uint64_t> &data_subset,
@@ -5744,7 +5687,7 @@ bool ReplicatedPG::handle_pull_response(
hoid.get_namespace());
assert(ssc);
}
- ObjectContext *obc = create_object_context(pi.recovery_info.oi, ssc);
+ ObjectContextRef obc = create_object_context(pi.recovery_info.oi, ssc);
obc->obs.exists = true;
obc->ondisk_write_lock();
@@ -6142,18 +6085,14 @@ bool ReplicatedPG::handle_push_reply(int peer, PushReplyOp &op, PushOp *reply)
void ReplicatedPG::finish_degraded_object(const hobject_t& oid)
{
dout(10) << "finish_degraded_object " << oid << dendl;
- map<hobject_t, ObjectContext *>::iterator i = object_contexts.find(oid);
- if (i != object_contexts.end()) {
- i->second->get();
- for (set<ObjectContext*>::iterator j = i->second->blocking.begin();
- j != i->second->blocking.end();
- i->second->blocking.erase(j++)) {
+ ObjectContextRef obc(object_contexts.lookup(oid));
+ if (obc) {
+ for (set<ObjectContextRef>::iterator j = obc->blocking.begin();
+ j != obc->blocking.end();
+ obc->blocking.erase(j++)) {
dout(10) << " no longer blocking writes for " << (*j)->obs.oi.soid << dendl;
- (*j)->blocked_by = NULL;
- put_object_context(*j);
- put_object_context(i->second);
+ (*j)->blocked_by = ObjectContextRef();
}
- put_object_context(i->second);
}
if (callbacks_for_degraded_object.count(oid)) {
list<Context*> contexts;
@@ -6258,11 +6197,10 @@ void ReplicatedPG::_committed_pushed_object(
unlock();
}
-void ReplicatedPG::_applied_recovered_object(ObjectContext *obc)
+void ReplicatedPG::_applied_recovered_object(ObjectContextRef obc)
{
lock();
dout(10) << "_applied_recovered_object " << *obc << dendl;
- put_object_context(obc);
assert(active_pushes >= 1);
--active_pushes;
@@ -6468,7 +6406,7 @@ eversion_t ReplicatedPG::pick_newest_available(const hobject_t& oid)
/* Mark an object as lost
*/
-ObjectContext *ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
+ObjectContextRef ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
const hobject_t &oid, eversion_t version,
utime_t mtime, int what)
{
@@ -6485,7 +6423,7 @@ ObjectContext *ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
pg_log_entry_t e(what, oid, info.last_update, version, osd_reqid_t(), mtime);
pg_log.add(e);
- ObjectContext *obc = get_object_context(oid, true);
+ ObjectContextRef obc = get_object_context(oid, true);
obc->ondisk_write_lock();
@@ -6502,7 +6440,7 @@ ObjectContext *ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
struct C_PG_MarkUnfoundLost : public Context {
ReplicatedPGRef pg;
- list<ObjectContext*> obcs;
+ list<ObjectContextRef> obcs;
C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {}
void finish(int r) {
pg->_finish_mark_all_unfound_lost(obcs);
@@ -6535,7 +6473,7 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
continue;
}
- ObjectContext *obc = NULL;
+ ObjectContextRef obc;
eversion_t prev;
switch (what) {
@@ -6594,13 +6532,16 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
pg_log.get_log().print(*_dout);
*_dout << dendl;
+ info.stats.stats_invalid = true;
+
if (missing.num_missing() == 0) {
// advance last_complete since nothing else is missing!
info.last_complete = info.last_update;
- dirty_info = true;
- write_if_dirty(*t);
}
+ dirty_info = true;
+ write_if_dirty(*t);
+
osd->store->queue_transaction(osr.get(), t, c, NULL, new C_OSD_OndiskWriteUnlockList(&c->obcs));
// Send out the PG log to all replicas
@@ -6611,7 +6552,7 @@ void ReplicatedPG::mark_all_unfound_lost(int what)
osd->queue_for_recovery(this);
}
-void ReplicatedPG::_finish_mark_all_unfound_lost(list<ObjectContext*>& obcs)
+void ReplicatedPG::_finish_mark_all_unfound_lost(list<ObjectContextRef>& obcs)
{
lock();
dout(10) << "_finish_mark_all_unfound_lost " << dendl;
@@ -6620,11 +6561,7 @@ void ReplicatedPG::_finish_mark_all_unfound_lost(list<ObjectContext*>& obcs)
requeue_ops(waiting_for_all_missing);
waiting_for_all_missing.clear();
- while (!obcs.empty()) {
- ObjectContext *obc = obcs.front();
- put_object_context(obc);
- obcs.pop_front();
- }
+ obcs.clear();
unlock();
}
@@ -7085,7 +7022,7 @@ int ReplicatedPG::recover_primary(int max, ThreadPool::TPHandle &handle)
case pg_log_entry_t::LOST_REVERT:
{
if (item.have == latest->reverting_to) {
- ObjectContext *obc = get_object_context(soid, true);
+ ObjectContextRef obc = get_object_context(soid, true);
if (obc->obs.oi.version == latest->version) {
// I'm already reverting
@@ -7185,7 +7122,7 @@ int ReplicatedPG::prep_object_replica_pushes(
dout(10) << __func__ << ": on " << soid << dendl;
// NOTE: we know we will get a valid oloc off of disk here.
- ObjectContext *obc = get_object_context(soid, false);
+ ObjectContextRef obc = get_object_context(soid, false);
if (!obc) {
pg_log.missing_add(soid, v, eversion_t());
bool uhoh = true;
@@ -7229,7 +7166,6 @@ int ReplicatedPG::prep_object_replica_pushes(
dout(10) << " ondisk_read_unlock on " << soid << dendl;
obc->ondisk_read_unlock();
- put_object_context(obc);
return 1;
}
@@ -7432,11 +7368,10 @@ int ReplicatedPG::recover_backfill(
for (set<hobject_t>::iterator i = add_to_stat.begin();
i != add_to_stat.end();
++i) {
- ObjectContext *obc = get_object_context(*i, false);
+ ObjectContextRef obc = get_object_context(*i, false);
pg_stat_t stat;
add_object_context_to_pg_stat(obc, &stat);
pending_backfill_updates[*i] = stat;
- put_object_context(obc);
}
for (map<hobject_t, eversion_t>::iterator i = to_remove.begin();
i != to_remove.end();
@@ -7507,13 +7442,12 @@ void ReplicatedPG::prep_backfill_object_push(
if (!pushing.count(oid))
start_recovery_op(oid);
- ObjectContext *obc = get_object_context(oid, false);
+ ObjectContextRef obc = get_object_context(oid, false);
obc->ondisk_read_lock();
(*pushes)[peer].push_back(PushOp());
prep_push_to_replica(obc, oid, peer, g_conf->osd_recovery_op_priority,
&((*pushes)[peer].back()));
obc->ondisk_read_unlock();
- put_object_context(obc);
}
void ReplicatedPG::scan_range(
@@ -7535,9 +7469,9 @@ void ReplicatedPG::scan_range(
for (vector<hobject_t>::iterator p = ls.begin(); p != ls.end(); ++p) {
handle.reset_tp_timeout();
- ObjectContext *obc = NULL;
+ ObjectContextRef obc;
if (is_primary())
- obc = _lookup_object_context(*p);
+ obc = object_contexts.lookup(*p);
if (obc) {
bi->objects[*p] = obc->obs.oi.version;
dout(20) << " " << *p << " " << obc->obs.oi.version << dendl;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index af14871fe27..0fbe5afd9ca 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -3,6 +3,9 @@
* Ceph - scalable distributed file system
*
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -27,6 +30,9 @@
#include "messages/MOSDOp.h"
#include "messages/MOSDOpReply.h"
#include "messages/MOSDSubOp.h"
+
+#include "common/sharedptr_registry.hpp"
+
class MOSDSubOpReply;
class ReplicatedPG;
@@ -124,10 +130,10 @@ public:
vector<pg_log_entry_t> log;
interval_set<uint64_t> modified_ranges;
- ObjectContext *obc; // For ref counting purposes
- map<hobject_t,ObjectContext*> src_obc;
- ObjectContext *clone_obc; // if we created a clone
- ObjectContext *snapset_obc; // if we created/deleted a snapdir
+ ObjectContextRef obc;
+ map<hobject_t,ObjectContextRef> src_obc;
+ ObjectContextRef clone_obc; // if we created a clone
+ ObjectContextRef snapset_obc; // if we created/deleted a snapdir
int data_off; // FIXME: we may want to kill this msgr hint off at some point!
@@ -150,7 +156,7 @@ public:
modify(false), user_modify(false),
bytes_written(0), bytes_read(0),
current_osd_subop_num(0),
- obc(0), clone_obc(0), snapset_obc(0), data_off(0), reply(NULL), pg(_pg),
+ data_off(0), reply(NULL), pg(_pg),
num_read(0),
num_write(0) {
if (_ssc) {
@@ -176,8 +182,8 @@ public:
eversion_t v;
OpContext *ctx;
- ObjectContext *obc;
- map<hobject_t,ObjectContext*> src_obc;
+ ObjectContextRef obc;
+ map<hobject_t,ObjectContextRef> src_obc;
tid_t rep_tid;
@@ -197,7 +203,7 @@ public:
list<ObjectStore::Transaction*> tls;
bool queue_snap_trimmer;
- RepGather(OpContext *c, ObjectContext *pi, tid_t rt,
+ RepGather(OpContext *c, ObjectContextRef pi, tid_t rt,
eversion_t lc) :
queue_item(this),
nref(1),
@@ -240,7 +246,7 @@ protected:
void eval_repop(RepGather*);
void issue_repop(RepGather *repop, utime_t now,
eversion_t old_last_update, bool old_exists, uint64_t old_size, eversion_t old_version);
- RepGather *new_repop(OpContext *ctx, ObjectContext *obc, tid_t rep_tid);
+ RepGather *new_repop(OpContext *ctx, ObjectContextRef obc, tid_t rep_tid);
void remove_repop(RepGather *repop);
void repop_ack(RepGather *repop,
int result, int ack_type,
@@ -276,50 +282,42 @@ protected:
friend struct C_OnPushCommit;
// projected object info
- map<hobject_t, ObjectContext*> object_contexts;
+ SharedPtrRegistry<hobject_t, ObjectContext> object_contexts;
map<object_t, SnapSetContext*> snapset_contexts;
+ Mutex snapset_contexts_lock;
// debug order that client ops are applied
map<hobject_t, map<client_t, tid_t> > debug_op_order;
- void populate_obc_watchers(ObjectContext *obc);
- void check_blacklisted_obc_watchers(ObjectContext *);
+ void populate_obc_watchers(ObjectContextRef obc);
+ void check_blacklisted_obc_watchers(ObjectContextRef obc);
void check_blacklisted_watchers();
void get_watchers(list<obj_watch_item_t> &pg_watchers);
- void get_obc_watchers(ObjectContext *obc, list<obj_watch_item_t> &pg_watchers);
+ void get_obc_watchers(ObjectContextRef obc, list<obj_watch_item_t> &pg_watchers);
public:
void handle_watch_timeout(WatchRef watch);
protected:
- ObjectContext *lookup_object_context(const hobject_t& soid) {
- if (object_contexts.count(soid)) {
- ObjectContext *obc = object_contexts[soid];
- obc->ref++;
- return obc;
- }
- return NULL;
- }
- ObjectContext *_lookup_object_context(const hobject_t& oid);
- ObjectContext *create_object_context(const object_info_t& oi, SnapSetContext *ssc);
- ObjectContext *get_object_context(const hobject_t& soid, bool can_create);
- void register_object_context(ObjectContext *obc) {
- if (!obc->registered) {
- assert(object_contexts.count(obc->obs.oi.soid) == 0);
- obc->registered = true;
- object_contexts[obc->obs.oi.soid] = obc;
- }
- if (obc->ssc)
- register_snapset_context(obc->ssc);
- }
+ ObjectContextRef create_object_context(const object_info_t& oi, SnapSetContext *ssc);
+ ObjectContextRef get_object_context(const hobject_t& soid, bool can_create);
void context_registry_on_change();
- void put_object_context(ObjectContext *obc);
- void put_object_contexts(map<hobject_t,ObjectContext*>& obcv);
+ void object_context_destructor_callback(ObjectContext *obc);
+ struct C_PG_ObjectContext : public Context {
+ ReplicatedPGRef pg;
+ ObjectContext *obc;
+ C_PG_ObjectContext(ReplicatedPG *p, ObjectContext *o) :
+ pg(p), obc(o) {}
+ void finish(int r) {
+ pg->object_context_destructor_callback(obc);
+ }
+ };
+
int find_object_context(const hobject_t& oid,
- ObjectContext **pobc,
+ ObjectContextRef *pobc,
bool can_create, snapid_t *psnapid=NULL);
- void add_object_context_to_pg_stat(ObjectContext *obc, pg_stat_t *stat);
+ void add_object_context_to_pg_stat(ObjectContextRef obc, pg_stat_t *stat);
void get_src_oloc(const object_t& oid, const object_locator_t& oloc, object_locator_t& src_oloc);
@@ -327,6 +325,11 @@ protected:
SnapSetContext *get_snapset_context(const object_t& oid, const string &key,
ps_t seed, bool can_create, const string &nspace);
void register_snapset_context(SnapSetContext *ssc) {
+ Mutex::Locker l(snapset_contexts_lock);
+ _register_snapset_context(ssc);
+ }
+ void _register_snapset_context(SnapSetContext *ssc) {
+ assert(snapset_contexts_lock.is_locked());
if (!ssc->registered) {
assert(snapset_contexts.count(ssc->oid) == 0);
ssc->registered = true;
@@ -525,7 +528,7 @@ protected:
int prep_object_replica_pushes(const hobject_t& soid, eversion_t v,
int priority,
map<int, vector<PushOp> > *pushes);
- void calc_head_subsets(ObjectContext *obc, SnapSet& snapset, const hobject_t& head,
+ void calc_head_subsets(ObjectContextRef obc, SnapSet& snapset, const hobject_t& head,
pg_missing_t& missing,
const hobject_t &last_backfill,
interval_set<uint64_t>& data_subset,
@@ -535,17 +538,17 @@ protected:
interval_set<uint64_t>& data_subset,
map<hobject_t, interval_set<uint64_t> >& clone_subsets);
void prep_push_to_replica(
- ObjectContext *obc,
+ ObjectContextRef obc,
const hobject_t& oid,
int dest,
int priority,
PushOp *push_op);
void prep_push(int priority,
- ObjectContext *obc,
+ ObjectContextRef obc,
const hobject_t& oid, int dest,
PushOp *op);
void prep_push(int priority,
- ObjectContext *obc,
+ ObjectContextRef obc,
const hobject_t& soid, int peer,
eversion_t version,
interval_set<uint64_t> &data_subset,
@@ -648,8 +651,8 @@ protected:
}
};
struct C_OSD_OndiskWriteUnlock : public Context {
- ObjectContext *obc, *obc2;
- C_OSD_OndiskWriteUnlock(ObjectContext *o, ObjectContext *o2=0) : obc(o), obc2(o2) {}
+ ObjectContextRef obc, obc2;
+ C_OSD_OndiskWriteUnlock(ObjectContextRef o, ObjectContextRef o2 = ObjectContextRef()) : obc(o), obc2(o2) {}
void finish(int r) {
obc->ondisk_write_unlock();
if (obc2)
@@ -657,17 +660,17 @@ protected:
}
};
struct C_OSD_OndiskWriteUnlockList : public Context {
- list<ObjectContext*> *pls;
- C_OSD_OndiskWriteUnlockList(list<ObjectContext*> *l) : pls(l) {}
+ list<ObjectContextRef> *pls;
+ C_OSD_OndiskWriteUnlockList(list<ObjectContextRef> *l) : pls(l) {}
void finish(int r) {
- for (list<ObjectContext*>::iterator p = pls->begin(); p != pls->end(); ++p)
+ for (list<ObjectContextRef>::iterator p = pls->begin(); p != pls->end(); ++p)
(*p)->ondisk_write_unlock();
}
};
struct C_OSD_AppliedRecoveredObject : public Context {
ReplicatedPGRef pg;
- ObjectContext *obc;
- C_OSD_AppliedRecoveredObject(ReplicatedPG *p, ObjectContext *o) :
+ ObjectContextRef obc;
+ C_OSD_AppliedRecoveredObject(ReplicatedPG *p, ObjectContextRef o) :
pg(p), obc(o) {}
void finish(int r) {
pg->_applied_recovered_object(obc);
@@ -730,7 +733,7 @@ protected:
void sub_op_modify_commit(RepModify *rm);
void sub_op_modify_reply(OpRequestRef op);
- void _applied_recovered_object(ObjectContext *obc);
+ void _applied_recovered_object(ObjectContextRef obc);
void _applied_recovered_object_replica();
void _committed_pushed_object(epoch_t epoch, eversion_t lc);
void recover_got(hobject_t oid, eversion_t v);
@@ -879,10 +882,10 @@ public:
void mark_all_unfound_lost(int what);
eversion_t pick_newest_available(const hobject_t& oid);
- ObjectContext *mark_object_lost(ObjectStore::Transaction *t,
+ ObjectContextRef mark_object_lost(ObjectStore::Transaction *t,
const hobject_t& oid, eversion_t version,
utime_t mtime, int what);
- void _finish_mark_all_unfound_lost(list<ObjectContext*>& obcs);
+ void _finish_mark_all_unfound_lost(list<ObjectContextRef>& obcs);
void on_role_change();
void on_change(ObjectStore::Transaction *t);
diff --git a/src/osd/Watch.cc b/src/osd/Watch.cc
index 8a084ca9aa1..ffa3adced24 100644
--- a/src/osd/Watch.cc
+++ b/src/osd/Watch.cc
@@ -250,15 +250,14 @@ public:
string Watch::gen_dbg_prefix() {
stringstream ss;
ss << pg->gen_prefix() << " -- Watch("
- << make_pair(cookie, entity)
- << ", obc->ref=" << (obc ? obc->ref : -1) << ") ";
+ << make_pair(cookie, entity) << ") ";
return ss.str();
}
Watch::Watch(
ReplicatedPG *pg,
OSDService *osd,
- ObjectContext *obc,
+ ObjectContextRef obc,
uint32_t timeout,
uint64_t cookie,
entity_name_t entity,
@@ -272,7 +271,6 @@ Watch::Watch(
addr(addr),
entity(entity),
discarded(false) {
- obc->get();
dout(10) << "Watch()" << dendl;
}
@@ -292,13 +290,6 @@ Context *Watch::get_delayed_cb()
return cb;
}
-ObjectContext *Watch::get_obc()
-{
- assert(obc);
- obc->get();
- return obc;
-}
-
void Watch::register_cb()
{
Mutex::Locker l(osd->watch_lock);
@@ -370,8 +361,7 @@ void Watch::discard_state()
sessionref->put();
conn = ConnectionRef();
}
- pg->put_object_context(obc);
- obc = NULL;
+ obc = ObjectContextRef();
}
bool Watch::is_discarded()
@@ -428,7 +418,7 @@ void Watch::notify_ack(uint64_t notify_id)
WatchRef Watch::makeWatchRef(
ReplicatedPG *pg, OSDService *osd,
- ObjectContext *obc, uint32_t timeout, uint64_t cookie, entity_name_t entity, entity_addr_t addr)
+ ObjectContextRef obc, uint32_t timeout, uint64_t cookie, entity_name_t entity, entity_addr_t addr)
{
WatchRef ret(new Watch(pg, osd, obc, timeout, cookie, entity, addr));
ret->set_self(ret);
diff --git a/src/osd/Watch.h b/src/osd/Watch.h
index 1c9fa28cb65..ecb61ad8b72 100644
--- a/src/osd/Watch.h
+++ b/src/osd/Watch.h
@@ -151,7 +151,7 @@ class Watch {
OSDService *osd;
boost::intrusive_ptr<ReplicatedPG> pg;
- ObjectContext *obc;
+ std::tr1::shared_ptr<ObjectContext> obc;
std::map<uint64_t, NotifyRef> in_progress_notifies;
@@ -165,7 +165,7 @@ class Watch {
Watch(
ReplicatedPG *pg, OSDService *osd,
- ObjectContext *obc, uint32_t timeout,
+ std::tr1::shared_ptr<ObjectContext> obc, uint32_t timeout,
uint64_t cookie, entity_name_t entity,
entity_addr_t addr);
@@ -187,7 +187,7 @@ public:
string gen_dbg_prefix();
static WatchRef makeWatchRef(
ReplicatedPG *pg, OSDService *osd,
- ObjectContext *obc, uint32_t timeout, uint64_t cookie, entity_name_t entity, entity_addr_t addr);
+ std::tr1::shared_ptr<ObjectContext> obc, uint32_t timeout, uint64_t cookie, entity_name_t entity, entity_addr_t addr);
void set_self(WatchRef _self) {
self = _self;
}
@@ -195,8 +195,8 @@ public:
/// Does not grant a ref count!
boost::intrusive_ptr<ReplicatedPG> get_pg() { return pg; }
- /// Grants a ref count!
- ObjectContext *get_obc();
+ std::tr1::shared_ptr<ObjectContext> get_obc() { return obc; }
+
uint64_t get_cookie() const { return cookie; }
entity_name_t get_entity() const { return entity; }
entity_addr_t get_peer_addr() const { return addr; }
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 3cafdc2b035..6cdacc9902c 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -2001,6 +2001,8 @@ struct ObjectState {
object_info_t oi;
bool exists;
+ ObjectState() : exists(false) {}
+
ObjectState(const object_info_t &oi_, bool exists_)
: oi(oi_), exists(exists_) {}
};
@@ -2022,13 +2024,18 @@ struct SnapSetContext {
* etc., because we don't send writes down to disk until after
* replicas ack.
*/
+
+struct ObjectContext;
+
+typedef std::tr1::shared_ptr<ObjectContext> ObjectContextRef;
+
struct ObjectContext {
- int ref;
- bool registered;
ObjectState obs;
SnapSetContext *ssc; // may be null
+ Context *destructor_callback;
+
private:
Mutex lock;
public:
@@ -2036,20 +2043,22 @@ public:
int unstable_writes, readers, writers_waiting, readers_waiting;
// set if writes for this object are blocked on another objects recovery
- ObjectContext *blocked_by; // object blocking our writes
- set<ObjectContext*> blocking; // objects whose writes we block
+ ObjectContextRef blocked_by; // object blocking our writes
+ set<ObjectContextRef> blocking; // objects whose writes we block
// any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers.
map<pair<uint64_t, entity_name_t>, WatchRef> watchers;
- ObjectContext(const object_info_t &oi_, bool exists_, SnapSetContext *ssc_)
- : ref(0), registered(false), obs(oi_, exists_), ssc(ssc_),
+ ObjectContext()
+ : ssc(NULL),
+ destructor_callback(0),
lock("ReplicatedPG::ObjectContext::lock"),
- unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0),
- blocked_by(0) {}
-
- void get() { ++ref; }
+ unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0) {}
+ ~ObjectContext() {
+ if (destructor_callback)
+ destructor_callback->complete(0);
+ }
// do simple synchronous mutual exclusion, for now. now waitqueues or anything fancy.
void ondisk_write_lock() {
lock.Lock();
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index eb490cef330..f94dc7baf6c 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -625,10 +625,10 @@ void Objecter::handle_osd_map(MOSDMap *m)
// was/is paused?
if (was_pauserd || was_pausewr || pauserd || pausewr)
maybe_request_map();
-
+
// unpause requests?
if ((was_pauserd && !pauserd) ||
- (was_pausewr && !pausewr))
+ (was_pausewr && !pausewr)) {
for (map<tid_t,Op*>::iterator p = ops.begin();
p != ops.end();
++p) {
@@ -638,6 +638,16 @@ void Objecter::handle_osd_map(MOSDMap *m)
!((op->flags & CEPH_OSD_FLAG_WRITE) && pausewr)) // not still paused as a write
need_resend[op->tid] = op;
}
+ for (map<tid_t, LingerOp*>::iterator lp = linger_ops.begin();
+ lp != linger_ops.end();
+ ++lp) {
+ LingerOp *op = lp->second;
+ if (!op->registered &&
+ !pauserd && // not still paused as a read
+ !((op->flags & CEPH_OSD_FLAG_WRITE) && pausewr)) // not still paused as a write
+ need_resend_linger.push_back(op);
+ }
+ }
// resend requests
for (map<tid_t, Op*>::iterator p = need_resend.begin(); p != need_resend.end(); ++p) {
@@ -2231,8 +2241,7 @@ void Objecter::dump_linger_ops(Formatter *fmt) const
fmt->dump_stream("object_id") << op->oid;
fmt->dump_stream("object_locator") << op->oloc;
fmt->dump_stream("snapid") << op->snap;
- fmt->dump_stream("registering") << op->snap;
- fmt->dump_stream("registered") << op->snap;
+ fmt->dump_stream("registered") << op->registered;
fmt->close_section(); // linger_op object
}
fmt->close_section(); // linger_ops array
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 2b8a716115b..644a1760aaf 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -114,6 +114,7 @@ void _usage()
cerr << " --access=<access> Set access permissions for sub-user, should be one\n";
cerr << " of read, write, readwrite, full\n";
cerr << " --display-name=<name>\n";
+ cerr << " --system set the system flag on the user\n";
cerr << " --bucket=<bucket>\n";
cerr << " --pool=<pool>\n";
cerr << " --object=<object>\n";
@@ -853,6 +854,9 @@ int main(int argc, char **argv)
cerr << "ERROR: invalid replica log type" << std::endl;
return EINVAL;
}
+ } else if (strncmp(*i, "-", 1) == 0) {
+ cerr << "ERROR: invalid flag " << *i << std::endl;
+ return EINVAL;
} else {
++i;
}
diff --git a/src/rgw/rgw_cache.h b/src/rgw/rgw_cache.h
index b6c4e15eede..601fcdfc963 100644
--- a/src/rgw/rgw_cache.h
+++ b/src/rgw/rgw_cache.h
@@ -177,14 +177,13 @@ class RGWCache : public T
if (ret < 0)
return ret;
- ret = T::init_watch();
- return ret;
+ return 0;
}
- void finalize() {
- T::finalize_watch();
- T::finalize();
+ bool need_watch_notify() {
+ return true;
}
+
int distribute_cache(const string& normal_name, rgw_obj& obj, ObjectCacheInfo& obj_info, int op);
int watch_cb(int opcode, uint64_t ver, bufferlist& bl);
public:
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 222b79a7d2e..03cc1ebfdb3 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -817,6 +817,9 @@ void RGWRadosCtx::set_prefetch_data(rgw_obj& obj) {
void RGWRados::finalize()
{
+ if (need_watch_notify()) {
+ finalize_watch();
+ }
delete meta_mgr;
delete data_log;
if (use_gc_thread) {
@@ -872,6 +875,14 @@ int RGWRados::init_complete()
{
int ret;
+ if (need_watch_notify()) {
+ ret = init_watch();
+ if (ret < 0) {
+ lderr(cct) << "ERROR: failed to initialize watch" << dendl;
+ return ret;
+ }
+ }
+
ret = region.init(cct, this);
if (ret < 0)
return ret;
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index d01f76ec224..e6ab244afa9 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1254,6 +1254,7 @@ public:
virtual int update_containers_stats(map<string, RGWBucketEnt>& m);
virtual int append_async(rgw_obj& obj, size_t size, bufferlist& bl);
+ virtual bool need_watch_notify() { return false; }
virtual int init_watch();
virtual void finalize_watch();
virtual int distribute(const string& key, bufferlist& bl);
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
index 21f51e68c35..90f6beca133 100644
--- a/src/test/cli/radosgw-admin/help.t
+++ b/src/test/cli/radosgw-admin/help.t
@@ -75,6 +75,7 @@
--access=<access> Set access permissions for sub-user, should be one
of read, write, readwrite, full
--display-name=<name>
+ --system set the system flag on the user
--bucket=<bucket>
--pool=<pool>
--object=<object>
diff --git a/src/test/mon/moncap.cc b/src/test/mon/moncap.cc
index 19f82f55ecf..238442b90d8 100644
--- a/src/test/mon/moncap.cc
+++ b/src/test/mon/moncap.cc
@@ -51,6 +51,7 @@ const char *parse_good[] = {
"allow service foo-foo r, allow service bar r",
"allow service \" foo \" w, allow service bar r",
"allow command abc with arg=foo arg2=bar, allow service foo r",
+ "allow command abc.def with arg=foo arg2=bar, allow service foo r",
"allow command \"foo bar\" with arg=\"baz\"",
"allow command \"foo bar\" with arg=\"baz.xx\"",
0
diff --git a/src/test/osd/osdcap.cc b/src/test/osd/osdcap.cc
index 5f7c607deec..8fc3ddd812a 100644
--- a/src/test/osd/osdcap.cc
+++ b/src/test/osd/osdcap.cc
@@ -41,6 +41,7 @@ const char *parse_good[] = {
"allow rwx pool foo; allow r pool bar",
"allow auid 123 rwx",
"allow pool foo rwx, allow pool bar r",
+ "allow pool foo.froo.foo rwx, allow pool bar r",
"allow pool foo rwx ; allow pool bar r",
"allow pool foo rwx ;allow pool bar r",
"allow pool foo rwx; allow pool bar r",
diff --git a/src/test/test_osd_types.cc b/src/test/test_osd_types.cc
index 730a8ffdc5d..e07c9e06592 100644
--- a/src/test/test_osd_types.cc
+++ b/src/test/test_osd_types.cc
@@ -1008,8 +1008,7 @@ protected:
TEST_F(ObjectContextTest, read_write_lock)
{
{
- object_info_t oi;
- ObjectContext obc(oi, false, NULL);
+ ObjectContext obc;
//
// write_lock
@@ -1044,8 +1043,7 @@ TEST_F(ObjectContextTest, read_write_lock)
useconds_t delay = 0;
{
- object_info_t oi;
- ObjectContext obc(oi, false, NULL);
+ ObjectContext obc;
//
// write_lock
@@ -1102,8 +1100,7 @@ TEST_F(ObjectContextTest, read_write_lock)
}
{
- object_info_t oi;
- ObjectContext obc(oi, false, NULL);
+ ObjectContext obc;
//
// read_lock
diff --git a/src/tools/ceph-monstore-tool.cc b/src/tools/ceph-monstore-tool.cc
index 4ab8fa86465..8f294c4a4e3 100644
--- a/src/tools/ceph-monstore-tool.cc
+++ b/src/tools/ceph-monstore-tool.cc
@@ -179,7 +179,7 @@ int main(int argc, char **argv) {
int fd;
if (vm.count("out")) {
- if ((fd = open(out_path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666)) == -1) {
+ if ((fd = open(out_path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) {
int _err = errno;
if (_err != EISDIR) {
std::cerr << "Couldn't open " << out_path << ": " << cpp_strerror(_err) << std::endl;
@@ -214,6 +214,7 @@ int main(int argc, char **argv) {
} else if (cmd == "compact") {
st.compact();
} else if (cmd == "getmonmap") {
+ assert(fd >= 0);
if (!store_path.size()) {
std::cerr << "need mon store path" << std::endl;
std::cerr << desc << std::endl;
diff --git a/src/yasm-wrapper b/src/yasm-wrapper
new file mode 100755
index 00000000000..57d95def46e
--- /dev/null
+++ b/src/yasm-wrapper
@@ -0,0 +1,38 @@
+#!/bin/sh -e
+
+# libtool and yasm do not get along.
+# filter out any crap that libtool feeds us that yasm does not understand.
+new=""
+touch=""
+while [ -n "$*" ]; do
+ case "$1" in
+ -f )
+ shift
+ new="-f $1"
+ shift
+ ;;
+ -g* | -f* | -W* | -MD | -MP | -fPIC | -c | -D* | --param* | -O* | -I* | -m* | -pipe )
+ shift
+ ;;
+ -MT )
+ shift
+ shift
+ ;;
+ -MF )
+ shift
+ touch="$1"
+ shift
+ ;;
+ * )
+ new="$new $1"
+ shift
+ ;;
+ esac
+done
+
+echo $0: yasm $new
+yasm $new
+
+[ -n "$touch" ] && touch $touch
+
+true \ No newline at end of file