summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--PendingReleaseNotes9
-rw-r--r--ceph.spec.in8
-rw-r--r--doc/index.rst1
-rw-r--r--doc/install/index.rst69
-rw-r--r--doc/install/libvirt-deb.rst43
-rw-r--r--doc/install/libvirt-rpm.rst19
-rw-r--r--doc/install/qemu-deb.rst26
-rw-r--r--doc/install/qemu-rpm.rst56
-rw-r--r--doc/install/rpm.rst165
-rw-r--r--doc/install/yum-priorities.rst20
-rw-r--r--doc/rados/operations/add-or-rm-mons.rst12
-rw-r--r--doc/rados/operations/authentication.rst1
-rw-r--r--doc/rados/operations/operating.rst237
-rw-r--r--doc/radosgw/config.rst1
-rw-r--r--doc/rbd/libvirt.rst54
-rw-r--r--doc/rbd/qemu-rbd.rst26
-rw-r--r--doc/rbd/rbd-openstack.rst2
-rw-r--r--doc/start/hardware-recommendations.rst (renamed from doc/install/hardware-recommendations.rst)0
-rw-r--r--doc/start/index.rst39
-rw-r--r--doc/start/intro.rst70
-rw-r--r--doc/start/os-recommendations.rst (renamed from doc/install/os-recommendations.rst)33
-rw-r--r--doc/start/quick-ceph-deploy.rst390
-rw-r--r--doc/start/quick-cephfs.rst4
-rw-r--r--doc/start/quick-rbd.rst56
-rw-r--r--doc/start/quick-rgw.rst4
-rw-r--r--doc/start/quick-start-preflight.rst195
-rwxr-xr-xqa/workunits/cephtool/test.sh5
-rwxr-xr-xqa/workunits/misc/mkpool_layout_vxattrs.sh8
-rwxr-xr-xqa/workunits/suites/fsstress.sh2
-rw-r--r--src/cls/rgw/cls_rgw_client.cc39
-rw-r--r--src/cls/rgw/cls_rgw_client.h8
-rw-r--r--src/common/Formatter.h3
-rw-r--r--src/common/common_init.cc7
-rw-r--r--src/common/config_opts.h8
-rw-r--r--src/common/lru_map.h61
-rw-r--r--src/global/signal_handler.cc4
-rw-r--r--src/include/rados/librados.hpp7
-rw-r--r--src/librados/PoolAsyncCompletionImpl.h5
-rw-r--r--src/mds/MDCache.cc39
-rw-r--r--src/mds/MDCache.h3
-rw-r--r--src/mds/MDS.cc1
-rw-r--r--src/mds/Server.cc1
-rw-r--r--src/mds/flock.h2
-rw-r--r--src/mds/mdstypes.cc5
-rw-r--r--src/mds/mdstypes.h6
-rw-r--r--src/mon/MDSMonitor.cc51
-rw-r--r--src/mon/MonCommands.h12
-rw-r--r--src/mon/OSDMonitor.cc193
-rw-r--r--src/mon/OSDMonitor.h3
-rw-r--r--src/os/FileStore.cc10
-rw-r--r--src/os/GenericFileStoreBackend.cc2
-rw-r--r--src/osd/OSD.cc15
-rw-r--r--src/osd/OSD.h2
-rw-r--r--src/osd/osd_types.cc3
-rw-r--r--src/osd/osd_types.h22
-rw-r--r--src/osdc/Objecter.h13
-rw-r--r--src/rgw/Makefile.am4
-rw-r--r--src/rgw/rgw_admin.cc110
-rw-r--r--src/rgw/rgw_bucket.cc1
-rw-r--r--src/rgw/rgw_common.h27
-rw-r--r--src/rgw/rgw_http_errors.h1
-rw-r--r--src/rgw/rgw_json_enc.cc20
-rw-r--r--src/rgw/rgw_main.cc7
-rw-r--r--src/rgw/rgw_metadata.cc2
-rw-r--r--src/rgw/rgw_op.cc61
-rw-r--r--src/rgw/rgw_op.h12
-rw-r--r--src/rgw/rgw_quota.cc332
-rw-r--r--src/rgw/rgw_quota.h74
-rw-r--r--src/rgw/rgw_rados.cc72
-rw-r--r--src/rgw/rgw_rados.h32
-rw-r--r--src/rgw/rgw_user.cc6
-rw-r--r--src/rgw/rgw_user.h13
-rw-r--r--src/test/cli/radosgw-admin/help.t9
-rw-r--r--src/test/encoding/ceph_dencoder.cc4
-rwxr-xr-xsrc/test/filestore/run_seed_to_range.sh2
-rw-r--r--src/test/librados/cmd.cc9
-rwxr-xr-xsrc/test/pybind/test_ceph_argparse.py21
-rwxr-xr-xsrc/vstart.sh3
78 files changed, 2118 insertions, 784 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index a3ec73290f3..a30cf8c6e17 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -21,3 +21,12 @@ v0.71
* Most output that used K or KB (e.g., for kilobyte) now uses a
lower-case k to match the official SI convention. Any scripts that
parse output and check for an upper-case K will need to be modified.
+
+v0.72
+~~~~~
+
+* ceph-fuse and radosgw now use the same default values for the admin
+ socket and log file paths that the other daemons (ceph-osd,
+ ceph-mon, etc.) do. If you run these daemons as non-root, you may
+ need to adjust your ceph.conf to disable these options or to adjust
+ the permissions on /var/run/ceph and /var/log/ceph.
diff --git a/ceph.spec.in b/ceph.spec.in
index a60d87ad814..bcb1214cc93 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -239,14 +239,8 @@ License: LGPL-2.0
Requires: java
Requires: libcephfs_jni1 = %{version}-%{release}
BuildRequires: java-devel
-%if 0%{?suse_version} > 1220
Requires: junit4
BuildRequires: junit4
-%else
-Requires: junit
-BuildRequires: junit
-%endif
-BuildRequires: junit
%description -n cephfs-java
This package contains the Java libraries for the Ceph File System.
@@ -404,7 +398,6 @@ fi
%{_bindir}/ceph-osd
%{_bindir}/ceph-rbdnamer
%{_bindir}/ceph-dencoder
-%{_bindir}/ceph-rest-api
%{_bindir}/librados-config
%{_bindir}/rados
%{_bindir}/rbd
@@ -422,6 +415,7 @@ fi
/sbin/mount.ceph
%dir %{_libdir}/rados-classes
%{_libdir}/rados-classes/libcls_rbd.so*
+%{_libdir}/rados-classes/libcls_hello.so*
%{_libdir}/rados-classes/libcls_rgw.so*
%{_libdir}/rados-classes/libcls_lock.so*
%{_libdir}/rados-classes/libcls_kvs.so*
diff --git a/doc/index.rst b/doc/index.rst
index 8bf5340b2f6..4068be599e5 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -90,6 +90,7 @@ about Ceph, see our `Architecture`_ section.
:maxdepth: 1
:hidden:
+ start/intro
start/index
install/index
rados/index
diff --git a/doc/install/index.rst b/doc/install/index.rst
index 347b6ae9ac2..3be09c5d0df 100644
--- a/doc/install/index.rst
+++ b/doc/install/index.rst
@@ -1,50 +1,54 @@
-==============
- Installation
-==============
-
-The Ceph Object Store is the foundation of all Ceph clusters, and it consists
-primarily of two types of daemons: Object Storage Daemons (OSDs) and monitors.
-The Ceph Object Store is based upon the concept of
-:abbr:`RADOS (Reliable Autonomic Distributed Object Store)`, which eliminates
-single points of failure and delivers infinite scalability. For details on
-the architecture of Ceph and RADOS, refer to `Ceph Architecture`_. All Ceph
-deployments have OSDs and monitors, so you should prepare your Ceph cluster
-by focusing first on the object storage cluster.
+=======================
+ Installation (Manual)
+=======================
.. raw:: html
- <table cellpadding="10"><colgroup><col width="33%"><col width="33%"><col width="33%"></colgroup><tbody valign="top"><tr><td><h3>Recommendations</h3>
-
-To begin using Ceph in production, you should review our hardware
-recommendations and operating system recommendations. Many of the
-frequently-asked questions in our mailing list involve hardware-related
-questions and how to install Ceph on various distributions.
+ <table><colgroup><col width="50%"><col width="50%"></colgroup><tbody valign="top"><tr><td><h3>Advanced Package Tool (APT)</h3>
+
+If you are deploying a Ceph cluster on Debian or Ubuntu distributions,
+use the instructions below to install packages manually.
.. toctree::
:maxdepth: 2
- Hardware Recommendations <hardware-recommendations>
- OS Recommendations <os-recommendations>
-
-.. raw:: html
+ Installing Debian/Ubuntu Packages <debian>
+ Installing on Calxeda Hardware <calxeda>
+ Installing QEMU <qemu-deb>
+ Installing libvirt <libvirt-deb>
- </td><td><h3>Installation</h3>
+.. raw:: html
-If you are deploying a Ceph cluster (that is, not developing Ceph),
-install Ceph using our stable release packages. For testing, you
-may install development release and testing packages.
+ </td><td><h3>Redhat Package Manager (RPM) / Yellowdog Updater, Modified (YUM) </h3>
+
+If you are deploying a Ceph cluster on Red Hat(rhel6), CentOS (el6), Fedora
+17-19 (f17-f19), OpenSUSE 12 (opensuse12), and SLES (sles11) distributions, use
+the instructions below to install packages manually.
.. toctree::
:maxdepth: 2
- Installing Debian/Ubuntu Packages <debian>
Installing RPM Packages <rpm>
- Installing on Calxeda <calxeda>
+ Installing YUM Priorities <yum-priorities>
+ Installing QEMU <qemu-rpm>
+ Installing libvirt <libvirt-rpm>
+
+.. raw:: html
+
+ </td></tr><tr><td><h3>Upgrading Ceph</h3>
+
+If you are upgrading Ceph from a previous release, please read the the upgrade
+documentation to ensure that you follow the proper upgrade sequence.
+
+.. toctree::
+ :maxdepth: 2
+
Upgrading Ceph <upgrading-ceph>
+
-.. raw:: html
+.. raw:: html
- </td><td><h3>Building Ceph from Source</h3>
+ </td><td><h3>Building Ceph</h3>
You can build Ceph from source by downloading a release or cloning the ``ceph``
repository at github. If you intend to build Ceph from source, please see the
@@ -63,9 +67,10 @@ will save you time.
Build a Package <build-packages>
Contributing Code <contributing>
+See the `Development`_ section for additional development details.
.. raw:: html
</td></tr></tbody></table>
-
-.. _Ceph Architecture: ../architecture/
+
+.. _Development: ../../dev \ No newline at end of file
diff --git a/doc/install/libvirt-deb.rst b/doc/install/libvirt-deb.rst
new file mode 100644
index 00000000000..9365e46c747
--- /dev/null
+++ b/doc/install/libvirt-deb.rst
@@ -0,0 +1,43 @@
+====================
+ Installing libvirt
+====================
+
+
+Prerequisites
+=============
+
+- `Install`_ and `configure`_ a Ceph Storage Cluster
+- `Install and configure`_ QEMU/KVM
+
+
+Installing ``libvirt`` on Ubuntu 12.04 Precise
+==============================================
+
+``libvirt`` packages are incorporated into the Ubuntu 12.04 precise
+distribution. To install ``libvirt`` on precise, execute the following::
+
+ sudo apt-get update && sudo apt-get install libvirt-bin
+
+
+Installing ``libvirt`` on Earlier Versions of Ubuntu
+====================================================
+
+For Ubuntu distributions 11.10 oneiric and earlier, you must build ``libvirt``
+from source. Clone the ``libvirt`` repository, and use `AutoGen`_ to generate
+the build. Then, execute ``make`` and ``make install`` to complete the
+installation. For example::
+
+ git clone git://libvirt.org/libvirt.git
+ cd libvirt
+ ./autogen.sh
+ make
+ sudo make install
+
+See `libvirt Installation`_ for details.
+
+
+.. _libvirt Installation: http://www.libvirt.org/compiling.html
+.. _AutoGen: http://www.gnu.org/software/autogen/
+.. _Install: ../index
+.. _configure: ../../rados/configuration
+.. _Install and configure: ../../rbd/qemu-rbd
diff --git a/doc/install/libvirt-rpm.rst b/doc/install/libvirt-rpm.rst
new file mode 100644
index 00000000000..a94c6e8ae12
--- /dev/null
+++ b/doc/install/libvirt-rpm.rst
@@ -0,0 +1,19 @@
+====================
+ Installing libvirt
+====================
+
+To use ``libvirt`` with a Ceph Storage Cluster, you must
+have a running Ceph Storage Cluster. You must also install QEMU.
+See `Installing QEMU`_ for details.
+
+
+``libvirt`` packages are incorporated into the recent CentOS/RHEL distributions.
+To install ``libvirt``, execute the following::
+
+ sudo yum install libvirt
+
+See `libvirt Installation`_ for details.
+
+
+.. _libvirt Installation: http://www.libvirt.org/compiling.html
+.. _Installing QEMU: ../qemu-rpm \ No newline at end of file
diff --git a/doc/install/qemu-deb.rst b/doc/install/qemu-deb.rst
new file mode 100644
index 00000000000..29abeafa3bc
--- /dev/null
+++ b/doc/install/qemu-deb.rst
@@ -0,0 +1,26 @@
+=================
+ Installing QEMU
+=================
+
+
+
+Installing QEMU (12.04 Precise and later)
+=========================================
+
+QEMU packages are incorporated into Ubuntu 12.04 Precise Pangolin and later
+versions. To install QEMU, execute the following::
+
+ sudo apt-get install qemu
+
+Installing QEMU (11.10 Oneric and earlier)
+==========================================
+
+For Ubuntu distributions 11.10 Oneiric and earlier, you must install
+the 0.15 version of QEMU or later. To build QEMU from source, use the
+following procedure::
+
+ cd {your-development-directory}
+ git clone git://git.qemu.org/qemu.git
+ cd qemu
+ ./configure --enable-rbd
+ make; make install
diff --git a/doc/install/qemu-rpm.rst b/doc/install/qemu-rpm.rst
new file mode 100644
index 00000000000..67da2c3714c
--- /dev/null
+++ b/doc/install/qemu-rpm.rst
@@ -0,0 +1,56 @@
+=================
+ Installing QEMU
+=================
+
+To install QEMU with ``yum``, you must ensure that you have
+``yum-plugin-priorities`` installed. See `Installing YUM Priorities`_
+for details.
+
+To install QEMU, execute the following:
+
+#. Create a ``/etc/yum.repos.d/ceph-qemu.conf`` file with the following
+ contents::
+
+ [ceph-qemu]
+ name=Ceph Packages for QEMU
+ baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/$basearch
+ enabled=1
+ priority=2
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
+
+ [ceph-qemu-noarch]
+ name=Ceph QEMU noarch
+ baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/noarch
+ enabled=1
+ priority=2
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
+
+ [ceph-qemu-source]
+ name=Ceph QEMU Sources
+ baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/SRPMS
+ enabled=1
+ priority=2
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
+
+#. Update your repositories. ::
+
+ sudo yum update
+
+#. Install QEMU for Ceph. ::
+
+ sudo yum install qemu-kvm qemu-kvm-tools qemu-img
+
+#. Install additional QEMU packages (optional)::
+
+ sudo yum install qemu-guest-agent qemu-guest-agent-win32
+
+See `QEMU and Block Devices`_ for usage.
+
+.. _QEMU and Block Devices: ../../rbd/qemu-rbd
+.. _Installing YUM Priorities: ../yum-priorities \ No newline at end of file
diff --git a/doc/install/rpm.rst b/doc/install/rpm.rst
index ea96d394c7a..9e8cdcd003c 100644
--- a/doc/install/rpm.rst
+++ b/doc/install/rpm.rst
@@ -7,6 +7,7 @@ development release packages (for the latest features), or development
testing packages (for development and QA only). Do not add multiple
package sources at the same time.
+
Install Release Key
===================
@@ -139,142 +140,54 @@ You can download the RPMs directly from::
-Installing Ceph Deploy
-======================
-
-Once you have added either release or development packages to ``yum``, you
-can install ``ceph-deploy``. ::
-
- sudo yum install ceph-deploy python-pushy
-
-
-
-Installing Ceph Packages
-========================
-
-Once you have added either release or development packages to ``yum``, you
-can install Ceph packages. You can also use ``ceph-deploy`` to install Ceph
-packages. ::
-
- sudo yum install ceph
-
-
-
-Installing Ceph Object Storage
-==============================
-
-:term:`Ceph Object Storage` runs on Apache and FastCGI in conjunction with the
-:term:`Ceph Storage Cluster`.
-
-#. Install Apache and FastCGI. ::
-
- rpm -ivh fcgi-2.4.0-10.el6.x86_64.rpm
- rpm -ivh mod_fastcgi-2.4.6-2.el6.rf.x86_64.rpm
-
-
-#. Install the Ceph Object Storage daemon. ::
+Adding Ceph to YUM
+==================
- yum install ceph-radosgw
+You may also add Ceph to the ``/etc/yum.repos.d`` directory. Create a
+``ceph.repo`` file. In the example below, replace ``{ceph-stable}`` with
+a stable release of Ceph (e.g., ``cuttlefish``, ``dumpling``, etc.) and
+``{distro}`` with your Linux distribution (e.g., ``el6``, ``rhel6``, etc.). ::
+ [ceph]
+ name=Ceph packages for $basearch
+ baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/$basearch
+ enabled=1
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
-#. Add the following lines to your Ceph configuration file.
+ [ceph-noarch]
+ name=Ceph noarch packages
+ baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/noarch
+ enabled=1
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
-.. code-block:: ini
+ [ceph-source]
+ name=Ceph source packages
+ baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/SRPMS
+ enabled=0
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
- [client.radosgw.gateway]
- host = {fqdn}
- keyring = /etc/ceph/keyring.radosgw.gateway
- rgw socket path = /tmp/radosgw.sock
- log file = /var/log/ceph/radosgw.log
- rgw print continue = false
-
-.. note:: Replace ``{fqdn}`` with the output from ``hostname``. This is
- important. Debian systems use the simple hostname, but on CentOS 6/RHEL 6
- you must use the fully qualified domain name.
-
-#. Create a data directory. ::
-
- mkdir -p /var/lib/ceph/radosgw/ceph-radosgw.gateway
-
-
-#. Change ``httpd ServerName`` in ``/etc/httpd/conf/httpd.conf``. ::
-
- ServerName {FQDN}
-
-
-#. Create an Apache httpd virtual host in ``/etc/httpd/conf.d/rgw.conf``. ::
-
- FastCgiExternalServer /var/www/s3gw.fcgi -socket /tmp/radosgw.sock
- <VirtualHost *:80>
- ServerName <FQDN of the host>
- ServerAdmin root@localhost
- DocumentRoot /var/www
- RewriteEngine On
- RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /s3gw.fcgi?page=$1&params=$2&%{QUERY_STRING} [E=HTTP_AUTHORIZATION:%{HTTP:Authorization},L]
- <IfModule mod_fastcgi.c>
- <Directory /var/www>
- Options +ExecCGI
- AllowOverride All
- SetHandler fastcgi-script
- Order allow,deny
- Allow from all
- AuthBasicAuthoritative Off
- </Directory>
- </IfModule>
- AllowEncodedSlashes On
- ErrorLog /var/log/httpd/error.log
- CustomLog /var/log/httpd/access.log combined
- ServerSignature Off
- </VirtualHost>
-
-#. Turn off ``fastcgiwrapper`` in ``/etc/httpd/conf.d/fastcgi.conf`` by
- commenting out the following line::
-
- #FastCgiWrapper On
-
-
-#. Add a ``fastcgi`` script with the following path ``/var/www/s3gw.fcgi``. ::
-
- #!/bin/sh
- exec /usr/bin/radosgw -c /etc/ceph/ceph.conf -n client.radosgw.gateway
-
-
-#. Make ``s3gw.fcgi`` executable::
-
- chmod +x /var/www/s3gw.fcgi
-
-
-#. Create a user key. ::
-
- ceph-authtool -C -n client.radosgw.gateway --gen-key /etc/ceph/keyring.radosgw.gateway
- ceph-authtool -n client.radosgw.gateway --cap mon 'allow rw' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway
- ceph auth add client.radosgw.gateway --in-file=/etc/ceph/keyring.radosgw.gateway
-
-
-#. Please make sure ``/etc/ceph/keyring.radosgw.gateway`` file and
- ``/var/log/ceph/radosgw.log`` are accessible by the ``apache`` user. ::
-
- sudo chown apache:apache /etc/ceph/keyring.radosgw.gateway
- sudo chown apache:apache /var/log/ceph/radosgw.log
-
-.. note:: This is important. The user is ``root`` for Debian.
+Installing Ceph Deploy
+======================
-#. Create ``.rgw.buckets`` and add it to the Ceph Object Storage daemon. ::
+Once you have added either release or development packages, or added a
+``ceph.repo`` file to ``/etc/yum.repos.d``, you can install ``ceph-deploy``. ::
- rados mkpool .rgw.buckets
- radosgw-admin pool add --pool .rgw.buckets
+ sudo yum install ceph-deploy python-pushy
-#. Configure Apache and the Ceph Object Storage daemon to start on boot. ::
- chkconfig httpd on
- chkconfig ceph-radosgw on
+Installing Ceph Packages
+========================
-#. Start the services. ::
+Once you have added either release or development packages, or added a
+``ceph.repo`` file to ``/etc/yum.repos.d``, you can install Ceph packages. ::
- /etc/init.d/httpd start
- /etc/init.d/ceph-radosgw start
-
-See `Ceph Object Storage`_ for additional details.
+ sudo yum install ceph
-.. _Ceph Object Storage: ../../radosgw
+.. note:: You can also use ``ceph-deploy`` to install Ceph packages.
diff --git a/doc/install/yum-priorities.rst b/doc/install/yum-priorities.rst
new file mode 100644
index 00000000000..e4adb72b7dd
--- /dev/null
+++ b/doc/install/yum-priorities.rst
@@ -0,0 +1,20 @@
+===========================
+ Installing YUM Priorities
+===========================
+
+Ceph builds packages for Apache and FastCGI (for 100-continue support) and
+QEMU (for ``rbd`` support). You must set priorities in your ``.repo``
+files to ensure that ``yum`` installs the Ceph packages instead of the
+standard packages. The ``priorities`` setting requires you to install
+and enable ``yum-plugin-priorities``.
+
+#. Install ``yum-plugin-priorities``. ::
+
+ sudo yum install yum-plugin-priorities
+
+#. Ensure ``/etc/yum/pluginconf.d/priorities.conf`` exists. ::
+
+#. Ensure ``priorities.conf`` enables the plugin. ::
+
+ [main]
+ enabled = 1
diff --git a/doc/rados/operations/add-or-rm-mons.rst b/doc/rados/operations/add-or-rm-mons.rst
index 17ae9d86b85..e3bac1fca09 100644
--- a/doc/rados/operations/add-or-rm-mons.rst
+++ b/doc/rados/operations/add-or-rm-mons.rst
@@ -32,7 +32,7 @@ version of Linux installed (typically Ubuntu 12.04 precise).
Add your monitor host to a rack in your cluster, connect it to the network
and ensure that it has network connectivity.
-.. _Hardware Recommendations: ../../install/hardware-recommendations
+.. _Hardware Recommendations: ../../../start/hardware-recommendations
Install the Required Software
-----------------------------
@@ -42,17 +42,9 @@ manually. See `Installing Debian/Ubuntu Packages`_ for details.
You should configure SSH to a user with password-less authentication
and root permissions.
-.. _Installing Debian/Ubuntu Packages: ../../install/debian
+.. _Installing Debian/Ubuntu Packages: ../../../install/debian
-For clusters deployed with Chef, create a `chef user`_, `configure
-SSH keys`_, `install Ruby`_ and `install the Chef client`_ on your host. See
-`Installing Chef`_ for details.
-.. _chef user: ../../install/chef#createuser
-.. _configure SSH keys: ../../install/chef#genkeys
-.. _install the Chef client: ../../install/chef#installchef
-.. _Installing Chef: ../../install/chef
-.. _install Ruby: ../../install/chef#installruby
.. _Adding a Monitor (Manual):
diff --git a/doc/rados/operations/authentication.rst b/doc/rados/operations/authentication.rst
index 6bacf4c7dff..d9995da8fb8 100644
--- a/doc/rados/operations/authentication.rst
+++ b/doc/rados/operations/authentication.rst
@@ -154,6 +154,7 @@ during setup and/or troubleshooting to temporarily disable authentication.
auth cluster required = none
auth service required = none
auth client required = none
+ auth supported = none
#. Or, disable ``cephx`` authentication for versions ``0.50`` and below
(deprecated as of version 0.51) by setting the following option in the
diff --git a/doc/rados/operations/operating.rst b/doc/rados/operations/operating.rst
index 9942ea3cabf..8c62ed5cdbf 100644
--- a/doc/rados/operations/operating.rst
+++ b/doc/rados/operations/operating.rst
@@ -7,11 +7,10 @@
Running Ceph with Upstart
=========================
-When deploying Ceph Cuttlefish and beyond with ``ceph-deploy``, you may start
-and stop Ceph daemons on a :term:`Ceph Node` using the event-based `Upstart`_.
-Upstart does not require you to define daemon instances in the Ceph configuration
-file (although, they are still required for ``sysvinit`` should you choose to
-use it).
+When deploying Ceph Cuttlefish and beyond with ``ceph-deploy`` on Debian/Ubuntu
+distributions, you may start and stop Ceph daemons on a :term:`Ceph Node` using
+the event-based `Upstart`_. Upstart does not require you to define daemon
+instances in the Ceph configuration file.
To list the Ceph Upstart jobs and instances on a node, execute::
@@ -19,6 +18,7 @@ To list the Ceph Upstart jobs and instances on a node, execute::
See `initctl`_ for additional details.
+
Starting all Daemons
--------------------
@@ -93,29 +93,20 @@ For example::
sudo start ceph-mds id=ceph-server
-
.. index:: Ceph service; sysvinit; operating a cluster
-Running Ceph as a Service
-=========================
+Running Ceph
+============
-When you deploy Ceph Argonaut or Bobtail with ``mkcephfs``, use the
-service or traditional sysvinit.
+Each time you to **start**, **restart**, and **stop** Ceph daemons (or your
+entire cluster) you must specify at least one option and one command. You may
+also specify a daemon type or a daemon instance. ::
-The ``ceph`` service provides functionality to **start**, **restart**, and
-**stop** your Ceph cluster. Each time you execute ``ceph`` processes, you
-must specify at least one option and one command. You may also specify a daemon
-type or a daemon instance. For most newer Debian/Ubuntu distributions, you may
-use the following syntax::
+ {commandline} [options] [commands] [daemons]
- sudo service ceph [options] [commands] [daemons]
-For older distributions, you may wish to use the ``/etc/init.d/ceph`` path::
-
- sudo /etc/init.d/ceph [options] [commands] [daemons]
-
-The ``ceph`` service options include:
+The ``ceph`` options include:
+-----------------+----------+-------------------------------------------------+
| Option | Shortcut | Description |
@@ -134,7 +125,7 @@ The ``ceph`` service options include:
| ``--conf`` | ``-c`` | Use an alternate configuration file. |
+-----------------+----------+-------------------------------------------------+
-The ``ceph`` service commands include:
+The ``ceph`` commands include:
+------------------+------------------------------------------------------------+
| Command | Description |
@@ -152,83 +143,213 @@ The ``ceph`` service commands include:
| ``cleanalllogs`` | Cleans out **everything** in the log directory. |
+------------------+------------------------------------------------------------+
-For subsystem operations, the ``ceph`` service can target specific daemon types by
-adding a particular daemon type for the ``[daemons]`` option. Daemon types include:
+For subsystem operations, the ``ceph`` service can target specific daemon types
+by adding a particular daemon type for the ``[daemons]`` option. Daemon types
+include:
- ``mon``
- ``osd``
- ``mds``
-The ``ceph`` service's ``[daemons]`` setting may also target a specific instance.
-To start a Ceph daemon on the local :term:`Ceph Node`, use the following syntax::
- sudo /etc/init.d/ceph start osd.0
+Running Ceph with sysvinit
+--------------------------
-To start a Ceph daemon on another node, use the following syntax::
-
- sudo /etc/init.d/ceph -a start osd.0
+Using traditional ``sysvinit`` is the recommended way to run Ceph with CentOS,
+Red Hat, Fedora, and SLES distributions. You may also use it for older
+distributions of Debian/Ubuntu.
-Where ``osd.0`` is the first OSD in the cluster.
-
-Starting a Cluster
-------------------
+Starting all Daemons
+~~~~~~~~~~~~~~~~~~~~
To start your Ceph cluster, execute ``ceph`` with the ``start`` command.
-The usage may differ based upon your Linux distribution. For example, for most
-newer Debian/Ubuntu distributions, you may use the following syntax::
-
- sudo service ceph [options] [start|restart] [daemonType|daemonID]
-
-For older distributions, you may wish to use the ``/etc/init.d/ceph`` path::
+Use the following syntax::
sudo /etc/init.d/ceph [options] [start|restart] [daemonType|daemonID]
The following examples illustrates a typical use case::
- sudo service ceph -a start
sudo /etc/init.d/ceph -a start
Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should begin
-operating. You may also specify a particular daemon instance to constrain the
-command to a single instance. To start a Ceph daemon on the local Ceph Node,
-use the following syntax::
+operating.
+
+
+Stopping all Daemons
+~~~~~~~~~~~~~~~~~~~~
+
+To stop your Ceph cluster, execute ``ceph`` with the ``stop`` command.
+Use the following syntax::
+
+ sudo /etc/init.d/ceph [options] stop [daemonType|daemonID]
+
+The following examples illustrates a typical use case::
+
+ sudo /etc/init.d/ceph -a stop
+Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should stop
+operating.
+
+
+Starting all Daemons by Type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To start all Ceph daemons of a particular type on the local Ceph Node, use the
+following syntax::
+
+ sudo /etc/init.d/ceph start {daemon-type}
+ sudo /etc/init.d/ceph start osd
+
+To start all Ceph daemons of a particular type on another node, use the
+following syntax::
+
+ sudo /etc/init.d/ceph -a start {daemon-type}
+ sudo /etc/init.d/ceph -a start osd
+
+
+Stopping all Daemons by Type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To stop all Ceph daemons of a particular type on the local Ceph Node, use the
+following syntax::
+
+ sudo /etc/init.d/ceph stop {daemon-type}
+ sudo /etc/init.d/ceph stop osd
+
+To stop all Ceph daemons of a particular type on another node, use the
+following syntax::
+
+ sudo /etc/init.d/ceph -a stop {daemon-type}
+ sudo /etc/init.d/ceph -a stop osd
+
+
+Starting a Daemon
+~~~~~~~~~~~~~~~~~
+
+To start a Ceph daemon on the local Ceph Node, use the following syntax::
+
+ sudo /etc/init.d/ceph start {daemon-type}.{instance}
sudo /etc/init.d/ceph start osd.0
To start a Ceph daemon on another node, use the following syntax::
+ sudo /etc/init.d/ceph -a start {daemon-type}.{instance}
sudo /etc/init.d/ceph -a start osd.0
-Stopping a Cluster
-------------------
+Stopping a Daemon
+~~~~~~~~~~~~~~~~~
+
+To stop a Ceph daemon on the local Ceph Node, use the following syntax::
+
+ sudo /etc/init.d/ceph stop {daemon-type}.{instance}
+ sudo /etc/init.d/ceph stop osd.0
+
+To stop a Ceph daemon on another node, use the following syntax::
+
+ sudo /etc/init.d/ceph -a stop {daemon-type}.{instance}
+ sudo /etc/init.d/ceph -a stop osd.0
+
+
+Running Ceph as a Service
+-------------------------
+
+When you deploy Ceph Argonaut or Bobtail with ``mkcephfs``, you operate
+Ceph as a service (you may also use sysvinit).
+
+
+Starting all Daemons
+~~~~~~~~~~~~~~~~~~~~
+
+To start your Ceph cluster, execute ``ceph`` with the ``start`` command.
+Use the following syntax::
+
+ sudo service ceph [options] [start|restart] [daemonType|daemonID]
+
+The following examples illustrates a typical use case::
+
+ sudo service ceph -a start
+
+Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should begin
+operating.
+
+
+Stopping all Daemons
+~~~~~~~~~~~~~~~~~~~~
To stop your Ceph cluster, execute ``ceph`` with the ``stop`` command.
-The usage may differ based upon your Linux distribution. For example, for most
-newer Debian/Ubuntu distributions, you may use the following syntax::
+Use the following syntax::
sudo service ceph [options] stop [daemonType|daemonID]
For example::
- sudo service ceph -a stop
-
-For older distributions, you may wish to use the ``/etc/init.d/ceph`` path::
-
- sudo /etc/init.d/ceph -a stop
+ sudo service ceph -a stop
Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should shut
-down. You may also specify a particular daemon instance to constrain the
-command to a single instance. To stop a Ceph daemon on the local Ceph Node,
-use the following syntax::
+down.
+
+
+Starting all Daemons by Type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To start all Ceph daemons of a particular type on the local Ceph Node, use the
+following syntax::
+
+ sudo service ceph start {daemon-type}
+ sudo service ceph start osd
+
+To start all Ceph daemons of a particular type on all nodes, use the following
+syntax::
+
+ sudo service ceph -a start {daemon-type}
+ sudo service ceph -a start osd
+
+
+Stopping all Daemons by Type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To stop all Ceph daemons of a particular type on the local Ceph Node, use the
+following syntax::
+
+ sudo service ceph stop {daemon-type}
+ sudo service ceph stop osd
+
+To stop all Ceph daemons of a particular type on all nodes, use the following
+syntax::
+
+ sudo service ceph -a stop {daemon-type}
+ sudo service ceph -a stop osd
- sudo /etc/init.d/ceph stop osd.0
+
+Starting a Daemon
+~~~~~~~~~~~~~~~~~
+
+To start a Ceph daemon on the local Ceph Node, use the following syntax::
+
+ sudo service ceph start {daemon-type}.{instance}
+ sudo service ceph start osd.0
+
+To start a Ceph daemon on another node, use the following syntax::
+
+ sudo service ceph -a start {daemon-type}.{instance}
+ sudo service ceph -a start osd.0
+
+
+Stopping a Daemon
+~~~~~~~~~~~~~~~~~
+
+To stop a Ceph daemon on the local Ceph Node, use the following syntax::
+
+ sudo service ceph stop {daemon-type}.{instance}
+ sudo service ceph stop osd.0
To stop a Ceph daemon on another node, use the following syntax::
- sudo /etc/init.d/ceph -a stop osd.0
+ sudo service ceph -a stop {daemon-type}.{instance}
+ sudo service ceph -a stop osd.0
diff --git a/doc/radosgw/config.rst b/doc/radosgw/config.rst
index 684a50649ec..caa3dac15e1 100644
--- a/doc/radosgw/config.rst
+++ b/doc/radosgw/config.rst
@@ -387,6 +387,7 @@ The following configuration options are available for Keystone integration::
rgw keystone accepted roles = {accepted user roles}
rgw keystone token cache size = {number of tokens to cache}
rgw keystone revocation interval = {number of seconds before checking revoked tickets}
+ rgw s3 auth use keystone = true
nss db path = {path to nss db}
A Ceph Object Gateway user is mapped into a Keystone ``tenant``. A Keystone user
diff --git a/doc/rbd/libvirt.rst b/doc/rbd/libvirt.rst
index cc8dc9bd189..4813c3258d0 100644
--- a/doc/rbd/libvirt.rst
+++ b/doc/rbd/libvirt.rst
@@ -40,46 +40,11 @@ The most common ``libvirt`` use case involves providing Ceph block devices to
cloud solutions like OpenStack or CloudStack. The cloud solution uses
``libvirt`` to interact with QEMU/KVM, and QEMU/KVM interacts with Ceph block
devices via ``librbd``. See `Block Devices and OpenStack`_ and `Block Devices
-and CloudStack`_ for details.
+and CloudStack`_ for details. See `Installation`_ for installation details.
You can also use Ceph block devices with ``libvirt``, ``virsh`` and the
``libvirt`` API. See `libvirt Virtualization API`_ for details.
-Prerequisites
-=============
-
-- `Install`_ and `configure`_ a Ceph cluster
-- `Install and configure`_ QEMU/KVM
-
-
-Installing ``libvirt`` on Ubuntu 12.04 Precise
-==============================================
-
-``libvirt`` packages are incorporated into the Ubuntu 12.04 precise
-distribution. To install ``libvirt`` on precise, execute the following::
-
- sudo apt-get update && sudo apt-get install libvirt-bin
-
-
-Installing ``libvirt`` on Earlier Versions of Ubuntu
-====================================================
-
-For Ubuntu distributions 11.10 oneiric and earlier, you must build ``libvirt``
-from source. Clone the ``libvirt`` repository, and use `AutoGen`_ to generate
-the build. Then, execute ``make`` and ``make install`` to complete the
-installation. For example::
-
- git clone git://libvirt.org/libvirt.git
- cd libvirt
- ./autogen.sh
- make
- sudo make install
-
-See `libvirt Installation`_ for details.
-
-
-Using Ceph with Virtual Machines
-================================
To create VMs that use Ceph block devices, use the procedures in the following
sections. In the exemplary embodiment, we've used ``libvirt-pool`` for the pool
@@ -89,7 +54,7 @@ when executing commands in the subsequent procedures.
Configuring Ceph
-----------------
+================
To configure Ceph for use with ``libvirt``, perform the following steps:
@@ -132,7 +97,7 @@ To configure Ceph for use with ``libvirt``, perform the following steps:
Preparing the VM Manager
-------------------------
+========================
You may use ``libvirt`` without a VM manager, but you may find it simpler to
create your first domain with ``virt-manager``.
@@ -150,7 +115,7 @@ create your first domain with ``virt-manager``.
Creating a VM
--------------
+=============
To create a VM with ``virt-manager``, perform the following steps:
@@ -182,7 +147,7 @@ To create a VM with ``virt-manager``, perform the following steps:
Configuring the VM
-------------------
+==================
When configuring the VM for use with Ceph, it is important to use ``virsh``
where appropriate. Additionally, ``virsh`` commands often require root
@@ -290,7 +255,7 @@ commands, refer to `Virsh Command Reference`_.
Summary
--------
+=======
Once you have configured the VM for use with Ceph, you can start the VM.
To verify that the VM and Ceph are communicating, you may perform the
@@ -320,13 +285,8 @@ If everything looks okay, you may begin using the Ceph block device
within your VM.
-
-.. _AutoGen: http://www.gnu.org/software/autogen/
-.. _libvirt Installation: http://www.libvirt.org/compiling.html
+.. _Installation: ../../install
.. _libvirt Virtualization API: http://www.libvirt.org
-.. _Install: ../../install
-.. _configure: ../../rados/configuration
-.. _Install and configure: ../qemu-rbd
.. _Block Devices and OpenStack: ../rbd-openstack
.. _Block Devices and CloudStack: ../rbd-cloudstack
.. _Create a pool: ../../rados/operations/pools#create-a-pool
diff --git a/doc/rbd/qemu-rbd.rst b/doc/rbd/qemu-rbd.rst
index 9d366f3ea8d..e0b55dee257 100644
--- a/doc/rbd/qemu-rbd.rst
+++ b/doc/rbd/qemu-rbd.rst
@@ -27,33 +27,12 @@ image each time it spins up a new virtual machine.
Ceph Block Devices can integrate with the QEMU virtual machine. For details on
QEMU, see `QEMU Open Source Processor Emulator`_. For QEMU documentation, see
-`QEMU Manual`_.
+`QEMU Manual`_. For installation details, see `Installation`_.
.. important:: To use Ceph Block Devices with QEMU, you must have access to a
running Ceph cluster.
-Installing QEMU (12.04 Precise and later)
-=========================================
-
-QEMU packages are incorporated into Ubuntu 12.04 Precise Pangolin and later
-versions. To install QEMU, execute the following::
-
- sudo apt-get install qemu
-
-Installing QEMU (11.10 Oneric and earlier)
-==========================================
-
-For Ubuntu distributions 11.10 Oneiric and earlier, you must install
-the 0.15 version of QEMU or later. To build QEMU from source, use the
-following procedure::
-
- cd {your-development-directory}
- git clone git://git.qemu.org/qemu.git
- cd qemu
- ./configure --enable-rbd
- make; make install
-
Creating Images with QEMU
=========================
@@ -199,4 +178,5 @@ QEMU command line settings override the Ceph configuration file settings.
.. _QEMU Open Source Processor Emulator: http://wiki.qemu.org/Main_Page
.. _QEMU Manual: http://wiki.qemu.org/Manual
.. _RBD Cache: ../rbd-config-ref/
-.. _Snapshots: ../rbd-snapshot/ \ No newline at end of file
+.. _Snapshots: ../rbd-snapshot/
+.. _Installation: ../../install \ No newline at end of file
diff --git a/doc/rbd/rbd-openstack.rst b/doc/rbd/rbd-openstack.rst
index 660757639aa..80dd43ce406 100644
--- a/doc/rbd/rbd-openstack.rst
+++ b/doc/rbd/rbd-openstack.rst
@@ -127,7 +127,7 @@ Hosts running ``nova-compute`` do not need the keyring. Instead, they
store the secret key in libvirt. Create a temporary copy of the secret
key on the hosts running ``nova-compute``::
- ssh {your-compute-host} client.volumes.key <`ceph auth get-key client.volumes`
+ ceph auth get-key client.volumes | ssh {your-compute-host} tee client.volumes.key
Then, on the compute hosts, add the secret key to libvirt and remove the
temporary copy of the key::
diff --git a/doc/install/hardware-recommendations.rst b/doc/start/hardware-recommendations.rst
index 90d29e5e7e2..90d29e5e7e2 100644
--- a/doc/install/hardware-recommendations.rst
+++ b/doc/start/hardware-recommendations.rst
diff --git a/doc/start/index.rst b/doc/start/index.rst
index 2fc03c0a284..6e9277746d9 100644
--- a/doc/start/index.rst
+++ b/doc/start/index.rst
@@ -1,34 +1,6 @@
-=================
- Getting Started
-=================
-
-Whether you want to provide :term:`Ceph Object Storage` and/or :term:`Ceph Block
-Device` services to :term:`Cloud Platforms`, deploy a :term:`Ceph Filesystem` or
-use Ceph for another purpose, all :term:`Ceph Storage Cluster` deployments begin
-with setting up each :term:`Ceph Node`, your network and the Ceph Storage
-Cluster. A Ceph Storage Cluster has three essential daemons:
-
-.. ditaa:: +---------------+ +---------------+ +---------------+
- | OSDs | | Monitor | | MDS |
- +---------------+ +---------------+ +---------------+
-
-- **OSDs**: A :term:`Ceph OSD Daemon` (OSD) stores data, handles data
- replication, recovery, backfilling, rebalancing, and provides some monitoring
- information to Ceph Monitors by checking other Ceph OSD Daemons for a
- heartbeat. A Ceph Storage Cluster requires at least two Ceph OSD Daemons to
- achieve an ``active + clean`` state.
-
-- **Monitors**: A :term:`Ceph Monitor` maintains maps of the cluster state,
- including the monitor map, the OSD map, the Placement Group (PG) map, and the
- CRUSH map. Ceph maintains a history (called an "epoch") of each state change
- in the Ceph Monitors, Ceph OSD Daemons, and PGs.
-
-- **MDSs**: A :term:`Ceph Metadata Server` (MDS) stores metadata on behalf of
- the :term:`Ceph Filesystem` (i.e., Ceph Block Devices and Ceph Object Storage
- do not use MDS). Ceph Metadata Servers make it feasible for POSIX file system
- users to execute basic commands like ``ls``, ``find``, etc. without placing
- an enormous burden on the Ceph Storage Cluster.
-
+======================
+ Installation (Quick)
+======================
.. raw:: html
@@ -37,18 +9,17 @@ Cluster. A Ceph Storage Cluster has three essential daemons:
A :term:`Ceph Client` and a :term:`Ceph Node` may require some basic
configuration work prior to deploying a Ceph Storage Cluster. You can also
-avail yourself of help from the Ceph community by getting involved.
+avail yourself of help by getting involved in the Ceph community.
.. toctree::
- Get Involved <get-involved>
Preflight <quick-start-preflight>
.. raw:: html
</td><td><h3>Step 2: Storage Cluster</h3>
-Once you've completed your preflight checklist, you should be able to begin
+Once you've completed your preflight checklist, you should be able to begin
deploying a Ceph Storage Cluster.
.. toctree::
diff --git a/doc/start/intro.rst b/doc/start/intro.rst
new file mode 100644
index 00000000000..704ff1e8cd5
--- /dev/null
+++ b/doc/start/intro.rst
@@ -0,0 +1,70 @@
+===============
+ Intro to Ceph
+===============
+
+Whether you want to provide :term:`Ceph Object Storage` and/or :term:`Ceph Block
+Device` services to :term:`Cloud Platforms`, deploy a :term:`Ceph Filesystem` or
+use Ceph for another purpose, all :term:`Ceph Storage Cluster` deployments begin
+with setting up each :term:`Ceph Node`, your network and the Ceph Storage
+Cluster. A Ceph Storage Cluster requires at least one Ceph Monitor and at least
+two Ceph OSD Daemons. The Ceph Metadata Server is essential when running Ceph
+Filesystem clients.
+
+.. ditaa:: +---------------+ +---------------+ +---------------+
+ | OSDs | | Monitor | | MDS |
+ +---------------+ +---------------+ +---------------+
+
+- **OSDs**: A :term:`Ceph OSD Daemon` (OSD) stores data, handles data
+ replication, recovery, backfilling, rebalancing, and provides some monitoring
+ information to Ceph Monitors by checking other Ceph OSD Daemons for a
+ heartbeat. A Ceph Storage Cluster requires at least two Ceph OSD Daemons to
+ achieve an ``active + clean`` state when the cluster makes two copies of your
+ data (Ceph makes 2 copies by default, but you can adjust it).
+
+- **Monitors**: A :term:`Ceph Monitor` maintains maps of the cluster state,
+ including the monitor map, the OSD map, the Placement Group (PG) map, and the
+ CRUSH map. Ceph maintains a history (called an "epoch") of each state change
+ in the Ceph Monitors, Ceph OSD Daemons, and PGs.
+
+- **MDSs**: A :term:`Ceph Metadata Server` (MDS) stores metadata on behalf of
+ the :term:`Ceph Filesystem` (i.e., Ceph Block Devices and Ceph Object Storage
+ do not use MDS). Ceph Metadata Servers make it feasible for POSIX file system
+ users to execute basic commands like ``ls``, ``find``, etc. without placing
+ an enormous burden on the Ceph Storage Cluster.
+
+Ceph stores a client's data as objects within storage pools. Using the CRUSH
+algorithm, Ceph calculates which placement group should contain the object,
+and further calculates which Ceph OSD Daemon should store the placement group.
+The CRUSH algorithm enables the Ceph Storage Cluster to scale, rebalance, and
+recover dynamically.
+
+
+.. raw:: html
+
+ <style type="text/css">div.body h3{margin:5px 0px 0px 0px;}</style>
+ <table cellpadding="10"><colgroup><col width="50%"><col width="50%"></colgroup><tbody valign="top"><tr><td><h3>Recommendations</h3>
+
+To begin using Ceph in production, you should review our hardware
+recommendations and operating system recommendations.
+
+.. toctree::
+ :maxdepth: 2
+
+ Hardware Recommendations <hardware-recommendations>
+ OS Recommendations <os-recommendations>
+
+
+.. raw:: html
+
+ </td><td><h3>Get Involved</h3>
+
+ You can avail yourself of help or contribute documentation, source
+ code or bugs by getting involved in the Ceph community.
+
+.. toctree::
+
+ get-involved
+
+.. raw:: html
+
+ </td></tr></tbody></table>
diff --git a/doc/install/os-recommendations.rst b/doc/start/os-recommendations.rst
index 71a4d3a278b..d8b418fe1b0 100644
--- a/doc/install/os-recommendations.rst
+++ b/doc/start/os-recommendations.rst
@@ -36,6 +36,36 @@ platforms. Generally speaking, there is very little dependence on
specific distributions aside from the kernel and system initialization
package (i.e., sysvinit, upstart, systemd).
+
+Dumpling (0.67)
+---------------
+
++----------+----------+--------------------+--------------+---------+------------+
+| Distro | Release | Code Name | Kernel | Notes | Testing |
++==========+==========+====================+==============+=========+============+
+| Ubuntu | 12.04 | Precise Pangolin | linux-3.2.0 | 1, 2 | B, I, C |
++----------+----------+--------------------+--------------+---------+------------+
+| Ubuntu | 12.10 | Quantal Quetzal | linux-3.5.4 | 2 | B |
++----------+----------+--------------------+--------------+---------+------------+
+| Ubuntu | 13.04 | Raring Ringtail | linux-3.8.5 | | B |
++----------+----------+--------------------+--------------+---------+------------+
+| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2, 3 | B |
++----------+----------+--------------------+--------------+---------+------------+
+| Debian | 7.0 | Wheezy | linux-3.2.0 | 1, 2 | B |
++----------+----------+--------------------+--------------+---------+------------+
+| CentOS | 6.3 | N/A | linux-2.6.32 | 1, 2 | B, I |
++----------+----------+--------------------+--------------+---------+------------+
+| RHEL | 6.3 | | linux-2.6.32 | 1, 2 | B, I |
++----------+----------+--------------------+--------------+---------+------------+
+| Fedora | 18.0 | Spherical Cow | linux-3.6.0 | | B |
++----------+----------+--------------------+--------------+---------+------------+
+| Fedora | 19.0 | Schrödinger's Cat | linux-3.10.0 | | B |
++----------+----------+--------------------+--------------+---------+------------+
+| OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B |
++----------+----------+--------------------+--------------+---------+------------+
+
+
+
Cuttlefish (0.61)
-----------------
@@ -63,6 +93,7 @@ Cuttlefish (0.61)
| OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B |
+----------+----------+--------------------+--------------+---------+------------+
+
Bobtail (0.56)
--------------
@@ -90,6 +121,7 @@ Bobtail (0.56)
| OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B |
+----------+----------+--------------------+--------------+---------+------------+
+
Argonaut (0.48)
---------------
@@ -126,6 +158,7 @@ Notes
``ceph-osd`` daemons using ``XFS`` or ``ext4`` on the same host will
not perform as well as they could.
+
Testing
-------
diff --git a/doc/start/quick-ceph-deploy.rst b/doc/start/quick-ceph-deploy.rst
index 3c0ca1b0653..1fabd1b182f 100644
--- a/doc/start/quick-ceph-deploy.rst
+++ b/doc/start/quick-ceph-deploy.rst
@@ -3,26 +3,31 @@
=============================
If you haven't completed your `Preflight Checklist`_, do that first. This
-**Quick Start** sets up a two-node demo cluster so you can explore some of the
-:term:`Ceph Storage Cluster` functionality. This **Quick Start** will help you
-install a minimal Ceph Storage Cluster on a server node from your admin node
-using ``ceph-deploy``.
+**Quick Start** sets up a :term:`Ceph Storage Cluster` using ``ceph-deploy``
+on your admin node. Create a three Ceph Node cluster so you can
+explore Ceph functionality.
.. ditaa::
- /----------------\ /----------------\
- | Admin Node |<------->| Server Node |
- | cCCC | | cCCC |
- +----------------+ +----------------+
- | Ceph Commands | | ceph - mon |
- \----------------/ +----------------+
- | ceph - osd |
- +----------------+
- | ceph - mds |
- \----------------/
-
-
-For best results, create a directory on your admin node for maintaining the
-configuration of your cluster. ::
+ /------------------\ /----------------\
+ | Admin Node | | ceph–node1 |
+ | +-------->+ cCCC |
+ | ceph–deploy | | mon.ceph–node1 |
+ \---------+--------/ \----------------/
+ |
+ | /----------------\
+ | | ceph–node2 |
+ +----------------->+ cCCC |
+ | | osd.0 |
+ | \----------------/
+ |
+ | /----------------\
+ | | ceph–node3 |
+ +----------------->| cCCC |
+ | osd.1 |
+ \----------------/
+
+For best results, create a directory on your admin node node for maintaining the
+configuration that ``ceph-deploy`` generates for your cluster. ::
mkdir my-cluster
cd my-cluster
@@ -31,228 +36,283 @@ configuration of your cluster. ::
current directory. Ensure you are in this directory when executing
``ceph-deploy``.
+As a first exercise, create a Ceph Storage Cluster with one Ceph Monitor and two
+Ceph OSD Daemons. Once the cluster reaches a ``active + clean`` state, expand it
+by adding a third Ceph OSD Daemon, a Metadata Server and two more Ceph Monitors.
+
+.. important:: Do not call ``ceph-deploy`` with ``sudo`` or run it as ``root``
+ if you are logged in as a different user, because it will not issue ``sudo``
+ commands needed on the remote host.
Create a Cluster
================
-To create your Ceph Storage Cluster, declare its initial monitors, generate a
-filesystem ID (``fsid``) and generate monitor keys by entering the following
-command on a commandline prompt::
+If at any point you run into trouble and you want to start over, execute
+the following::
- ceph-deploy new {mon-server-name}
- ceph-deploy new mon-ceph-node
+ ceph-deploy purgedata {ceph-node} [{ceph-node}]
+ ceph-deploy forgetkeys
-Check the output of ``ceph-deploy`` with ``ls`` and ``cat`` in the current
-directory. You should see a Ceph configuration file, a keyring, and a log file
-for the new cluster. See `ceph-deploy new -h`_ for additional details.
-.. topic:: Single Node Quick Start
+On your admin node, perform the following steps using ``ceph-deploy``.
- Assuming only one node for your Ceph Storage Cluster, you will need to
- modify the default ``osd crush chooseleaf type`` setting (it defaults to
- ``1`` for ``node``) to ``0`` for ``device`` so that it will peer with OSDs
- on the local node. Add the following line to your Ceph configuration file::
-
- osd crush chooseleaf type = 0
+#. Create the cluster. ::
-.. tip:: If you deploy without executing foregoing step on a single node
- cluster, your Ceph Storage Cluster will not achieve an ``active + clean``
- state. To remedy this situation, you must modify your `CRUSH Map`_.
+ ceph-deploy new {ceph-node}
+ ceph-deploy new ceph-node1
-Install Ceph
-============
+ Check the output of ``ceph-deploy`` with ``ls`` and ``cat`` in the current
+ directory. You should see a Ceph configuration file, a keyring, and a log
+ file for the new cluster. See `ceph-deploy new -h`_ for additional details.
-To install Ceph on your server node, open a command line on your admin
-node and type the following::
+#. Install Ceph. ::
- ceph-deploy install {server-node-name}[,{server-node-name}]
- ceph-deploy install mon-ceph-node
+ ceph-deploy install {ceph-node}[{ceph-node} ...]
+ ceph-deploy install ceph-node1 ceph-node2 ceph-node3
-Without additional arguments, ``ceph-deploy`` will install the most recent
-stable Ceph package to the server node. See `ceph-deploy install -h`_ for
-additional details.
-.. tip:: When ``ceph-deploy`` completes installation successfully,
- it should echo ``OK``.
+#. Add a Ceph Monitor. ::
+ ceph-deploy mon create {ceph-node}
+ ceph-deploy mon create ceph-node1
+
+#. Gather keys. ::
-Add a Monitor
-=============
+ ceph-deploy gatherkeys {ceph-node}
+ ceph-deploy gatherkeys ceph-node1
-To run a Ceph cluster, you need at least one Ceph Monitor. When using
-``ceph-deploy``, the tool enforces a single Ceph Monitor per node. Execute the
-following to create a Ceph Monitor::
+ Once you have gathered keys, your local directory should have the following
+ keyrings:
- ceph-deploy mon create {mon-server-name}
- ceph-deploy mon create mon-ceph-node
+ - ``{cluster-name}.client.admin.keyring``
+ - ``{cluster-name}.bootstrap-osd.keyring``
+ - ``{cluster-name}.bootstrap-mds.keyring``
+
-.. tip:: In production environments, we recommend running Ceph Monitors on
- nodes that do not run OSDs.
+#. Add two OSDs. For fast setup, this quick start uses a directory rather
+ than an entire disk per Ceph OSD Daemon. See `ceph-deploy osd`_ for
+ details on using separate disks/partitions for OSDs and journals.
+ Login to the Ceph Nodes and create a directory for
+ the Ceph OSD Daemon. ::
+
+ ssh ceph-node2
+ sudo mkdir /tmp/osd0
+ exit
+
+ ssh ceph-node3
+ sudo mkdir /tmp/osd1
+ exit
-When you have added a monitor successfully, directories under ``/var/lib/ceph``
-on your server node should have subdirectories ``bootstrap-mds`` and
-``bootstrap-osd`` that contain keyrings. If these directories do not contain
-keyrings, execute ``ceph-deploy mon create`` again on the admin node.
+ Then, from your admin node, use ``ceph-deploy`` to prepare the OSDs. ::
+ ceph-deploy osd prepare {ceph-node}:/path/to/directory
+ ceph-deploy osd prepare ceph-node2:/tmp/osd0 ceph-node3:/tmp/osd1
-Gather Keys
-===========
+ Finally, activate the OSDs. ::
-To deploy additional daemons and provision them with monitor authentication keys
-from your admin node, you must first gather keys from a monitor node. Execute
-the following to gather keys::
+ ceph-deploy osd activate {ceph-node}:/path/to/directory
+ ceph-deploy osd activate ceph-node2:/tmp/osd0 ceph-node3:/tmp/osd1
- ceph-deploy gatherkeys {mon-server-name}
- ceph-deploy gatherkeys mon-ceph-node
+#. Use ``ceph-deploy`` to copy the configuration file and admin key to
+ your admin node and your Ceph Nodes so that you can use the ``ceph``
+ CLI without having to specify the monitor address and
+ ``ceph.client.admin.keyring`` each time you execute a command. ::
+
+ ceph-deploy admin {ceph-node}
+ ceph-deploy admin admin-node ceph-node1 ceph-node2 ceph-node3
-Once you have gathered keys, your local directory should have the following keyrings:
+ **Note:** Since you are using ``ceph-deploy`` to talk to the
+ local host, your host must be reachable by its hostname
+ (e.g., you can modify ``/etc/hosts`` if necessary). Ensure that
+ you have the correct permissions for the ``ceph.client.admin.keyring``.
-- ``{cluster-name}.client.admin.keyring``
-- ``{cluster-name}.bootstrap-osd.keyring``
-- ``{cluster-name}.bootstrap-mds.keyring``
+#. Check your cluster's health. ::
-If you don't have these keyrings, you may not have created a monitor successfully,
-or you may have a problem with your network connection. Ensure that you complete
-this step such that you have the foregoing keyrings before proceeding further.
+ ceph health
-.. tip:: You may repeat this procedure. If it fails, check to see if the
- ``/var/lib/ceph/boostrap-{osd}|{mds}`` directories on the server node
- have keyrings. If they do not have keyrings, try adding the monitor again;
- then, return to this step.
+ Your cluster should return an ``active + clean`` state when it
+ has finished peering.
-Add Ceph OSD Daemons
-====================
+Operating Your Cluster
+======================
-For a cluster's object placement groups to reach an ``active + clean`` state,
-you must have at least two instances of a :term:`Ceph OSD Daemon` running and
-at least two copies of an object (``osd pool default size`` is ``2``
-by default).
+Deploying a Ceph cluster with ``ceph-deploy`` automatically starts the cluster.
+To operate the cluster daemons with Debian/Ubuntu distributions, see
+`Running Ceph with Upstart`_. To operate the cluster daemons with CentOS,
+Red Hat, Fedora, and SLES distributions, see `Running Ceph with sysvinit`_.
-Adding Ceph OSD Daemons is slightly more involved than other ``ceph-deploy``
-commands, because a Ceph OSD Daemon involves both a data store and a journal.
-The ``ceph-deploy`` tool has the ability to invoke ``ceph-disk-prepare`` to
-prepare the disk and activate the Ceph OSD Daemon for you.
+To learn more about peering and cluster health, see `Monitoring a Cluster`_.
+To learn more about Ceph OSD Daemon and placement group health, see
+`Monitoring OSDs and PGs`_.
+
+Once you deploy a Ceph cluster, you can try out some of the administration
+functionality, the ``rados`` object store command line, and then proceed to
+Quick Start guides for Ceph Block Device, Ceph Filesystem, and the Ceph Object
+Gateway.
-Multiple OSDs on the OS Disk (Demo Only)
-----------------------------------------
-For demonstration purposes, you may wish to add multiple OSDs to the OS disk
-(not recommended for production systems). To use Ceph OSDs daemons on the OS
-disk, you must use ``prepare`` and ``activate`` as separate steps. First,
-define a directory for the Ceph OSD daemon(s). ::
-
- mkdir /tmp/osd0
- mkdir /tmp/osd1
-
-Then, use ``prepare`` to prepare the directory(ies) for use with a
-Ceph OSD Daemon. ::
-
- ceph-deploy osd prepare {osd-node-name}:/tmp/osd0
- ceph-deploy osd prepare {osd-node-name}:/tmp/osd1
+Expanding Your Cluster
+======================
-Finally, use ``activate`` to activate the Ceph OSD Daemons. ::
+Once you have a basic cluster up and running, the next step is to expand
+cluster. Add a Ceph OSD Daemon and a Ceph Metadata Server to ``ceph-node1``.
+Then add a Ceph Monitor to ``ceph-node2`` and ``ceph-node3`` to establish a
+quorum of Ceph Monitors.
- ceph-deploy osd activate {osd-node-name}:/tmp/osd0
- ceph-deploy osd activate {osd-node-name}:/tmp/osd1
+.. ditaa::
+ /------------------\ /----------------\
+ | ceph–deploy | | ceph–node1 |
+ | Admin Node | | cCCC |
+ | +-------->+ mon.ceph–node1 |
+ | | | osd.2 |
+ | | | mds.ceph–node1 |
+ \---------+--------/ \----------------/
+ |
+ | /----------------\
+ | | ceph–node2 |
+ | | cCCC |
+ +----------------->+ |
+ | | osd.0 |
+ | | mon.ceph–node2 |
+ | \----------------/
+ |
+ | /----------------\
+ | | ceph–node3 |
+ | | cCCC |
+ +----------------->+ |
+ | osd.1 |
+ | mon.ceph–node3 |
+ \----------------/
-.. tip:: You need two OSDs to reach an ``active + clean`` state. You can
- add one OSD at a time, but OSDs need to communicate with each other
- for Ceph to run properly. Always use more than one OSD per cluster.
+Adding an OSD
+-------------
+Since you are running a 3-node cluster for demonstration purposes, add the OSD
+to the monitor node. ::
-List Disks
-----------
+ ssh ceph-node1
+ sudo mkdir /tmp/osd2
+ exit
-To list the available disk drives on a prospective :term:`Ceph Node`, execute
-the following::
+Then, from your ``ceph-deploy`` node, prepare the OSD. ::
- ceph-deploy disk list {osd-node-name}
- ceph-deploy disk list ceph-node
+ ceph-deploy osd prepare {ceph-node}:/path/to/directory
+ ceph-deploy osd prepare ceph-node1:/tmp/osd2
+Finally, activate the OSDs. ::
-Zap a Disk
-----------
+ ceph-deploy osd activate {ceph-node}:/path/to/directory
+ ceph-deploy osd activate ceph-node1:/tmp/osd2
-To zap a disk (delete its partition table) in preparation for use with Ceph,
-execute the following::
- ceph-deploy disk zap {osd-node-name}:{disk}
- ceph-deploy disk zap ceph-node:sdb ceph-node:sdb2
+Once you have added your new OSD, Ceph will begin rebalancing the cluster by
+migrating placement groups to your new OSD. You can observe this process with
+the ``ceph`` CLI. ::
-.. important:: This will delete all data on the disk.
+ ceph -w
+You should see the placement group states change from ``active+clean`` to active
+with some degraded objects, and finally ``active+clean`` when migration
+completes. (Control-c to exit.)
-Add OSDs on Standalone Disks
-----------------------------
-You can add OSDs using ``prepare`` and ``activate`` in two discrete
-steps. To prepare a disk for use with a Ceph OSD Daemon, execute the
-following::
+Add a Metadata Server
+---------------------
- ceph-deploy osd prepare {osd-node-name}:{osd-disk-name}[:/path/to/journal]
- ceph-deploy osd prepare ceph-node:sdb
+To use CephFS, you need at least one metadata server. Execute the following to
+create a metadata server::
-To activate the Ceph OSD Daemon, execute the following::
+ ceph-deploy mds create {ceph-node}
+ ceph-deploy mds create ceph-node1
- ceph-deploy osd activate {osd-node-name}:{osd-partition-name}
- ceph-deploy osd activate ceph-node:sdb1
-To prepare an OSD disk and activate it in one step, execute the following::
+.. note:: Currently Ceph runs in production with one metadata server only. You
+ may use more, but there is currently no commercial support for a cluster
+ with multiple metadata servers.
- ceph-deploy osd create {osd-node-name}:{osd-disk-name}[:/path/to/journal] [{osd-node-name}:{osd-disk-name}[:/path/to/journal]]
- ceph-deploy osd create ceph-node:sdb:/dev/ssd1 ceph-node:sdc:/dev/ssd2
+Adding Monitors
+---------------
-.. note:: The journal example assumes you will use a partition on a separate
- solid state drive (SSD). If you omit a journal drive or partition,
- ``ceph-deploy`` will use create a separate partition for the journal
- on the same drive. If you have already formatted your disks and created
- partitions, you may also use partition syntax for your OSD disk.
+A Ceph Storage Cluster requires at least one Ceph Monitor to run. For high
+availability, Ceph Storage Clusters typically run multiple Ceph
+Monitors so that the failure of a single Ceph Monitor will not bring down the
+Ceph Storage Cluster. Ceph uses the Paxos algorithm, which requires a majority
+of monitors (i.e., 1, 2:3, 3:4, 3:5, 4:6, etc.) to form a quorum.
-You must add a minimum of two Ceph OSD Daemons for the placement groups in
-a cluster to achieve an ``active + clean`` state.
+Add two Ceph Monitors to your cluster. ::
+ ceph-deploy mon create {ceph-node}
+ ceph-deploy mon create ceph-node2 ceph-node3
-Add a MDS
-=========
+Once you have added your new Ceph Monitors, Ceph will begin synchronizing
+the monitors and form a quorum. You can check the quorum status by executing
+the following::
-To use CephFS, you need at least one metadata node. Execute the following to
-create a metadata node::
+ ceph quorum_status
- ceph-deploy mds create {node-name}
- ceph-deploy mds create ceph-node
-.. note:: Currently Ceph runs in production with one metadata node only. You
- may use more, but there is currently no commercial support for a cluster
- with multiple metadata nodes.
+Storing/Retrieving Object Data
+==============================
+To store object data in the Ceph Storage Cluster, a Ceph client must:
-Summary
-=======
+#. Set an object name
+#. Specify a `pool`_
-Deploying a Ceph cluster with ``ceph-deploy`` automatically starts the cluster.
-To operate the cluster daemons, see `Running Ceph with Upstart`_.
+The Ceph Client retrieves the latest cluster map and the CRUSH algorithm
+calculates how to map the object to a `placement group`_, and then calculates
+how to assign the placement group to a Ceph OSD Daemon dynamically. To find the
+object location, all you need is the object name and the pool name. For
+example::
-Once you deploy a Ceph cluster, you can try out some of the administration
-functionality, the object store command line, and then proceed to Quick Start
-guides for RBD, CephFS, and the Ceph Gateway.
+ ceph osd map {poolname} {object-name}
-.. topic:: Other ceph-deploy Commands
+.. topic:: Exercise: Locate an Object
- To view other ``ceph-deploy`` commands, execute:
-
- ``ceph-deploy -h``
-
+ As an exercise, lets create an object. Specify an object name, a path to
+ a test file containing some object data and a pool name using the
+ ``rados put`` command on the command line. For example::
+
+ rados put {object-name} {file-path} --pool=data
+ rados put test-object-1 testfile.txt --pool=data
+
+ To verify that the Ceph Storage Cluster stored the object, execute
+ the following::
+
+ rados -p data ls
+
+ Now, identify the object location::
-See `Ceph Deploy`_ for additional details.
+ ceph osd map {pool-name} {object-name}
+ ceph osd map data test-object-1
+
+ Ceph should output the object's location. For example::
+
+ osdmap e537 pool 'data' (0) object 'test-object-1' -> pg 0.d1743484 (0.4) -> up [1,0] acting [1,0]
+
+ To remove the test object, simply delete it using the ``rados rm``
+ command. For example::
+
+ rados rm test-object-1 --pool=data
+
+As the cluster evolves, the object location may change dynamically. One benefit
+of Ceph's dynamic rebalancing is that Ceph relieves you from having to perform
+the migration manually.
.. _Preflight Checklist: ../quick-start-preflight
.. _Ceph Deploy: ../../rados/deployment
.. _ceph-deploy install -h: ../../rados/deployment/ceph-deploy-install
.. _ceph-deploy new -h: ../../rados/deployment/ceph-deploy-new
+.. _ceph-deploy osd: ../../rados/deployment/ceph-deploy-osd
.. _Running Ceph with Upstart: ../../rados/operations/operating#running-ceph-with-upstart
-.. _CRUSH Map: ../../rados/operations/crush-map \ No newline at end of file
+.. _Running Ceph with sysvinit: ../../rados/operations/operating#running-ceph-with-sysvinit
+.. _CRUSH Map: ../../rados/operations/crush-map
+.. _pool: ../../rados/operations/pools
+.. _placement group: ../../rados/operations/placement-groups
+.. _Monitoring a Cluster: ../../rados/operations/monitoring
+.. _Monitoring OSDs and PGs: ../../rados/operations/monitoring-osd-pg \ No newline at end of file
diff --git a/doc/start/quick-cephfs.rst b/doc/start/quick-cephfs.rst
index 18dadb005ec..5449e5a6fe3 100644
--- a/doc/start/quick-cephfs.rst
+++ b/doc/start/quick-cephfs.rst
@@ -3,7 +3,7 @@
=====================
To use the :term:`Ceph FS` Quick Start guide, you must have executed the
-procedures in the `Ceph Deploy Quick Start`_ guide first. Execute this quick
+procedures in the `Storage Cluster Quick Start`_ guide first. Execute this quick
start on the Admin Host.
Prerequisites
@@ -91,7 +91,7 @@ See `Ceph FS`_ for additional information. Ceph FS is not quite as stable
as the Ceph Block Device and Ceph Object Storage. See `Troubleshooting`_
if you encounter trouble.
-.. _Ceph Deploy Quick Start: ../quick-ceph-deploy
+.. _Storage Cluster Quick Start: ../quick-ceph-deploy
.. _Ceph FS: ../../cephfs/
.. _FAQ: http://wiki.ceph.com/03FAQs/01General_FAQ#How_Can_I_Give_Ceph_a_Try.3F
.. _Troubleshooting: ../../cephfs/troubleshooting \ No newline at end of file
diff --git a/doc/start/quick-rbd.rst b/doc/start/quick-rbd.rst
index a466771502d..9424457f8c2 100644
--- a/doc/start/quick-rbd.rst
+++ b/doc/start/quick-rbd.rst
@@ -2,47 +2,73 @@
Block Device Quick Start
==========================
-To use this guide, you must have executed the procedures in the `Object Store
-Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is in an
-``active + clean`` state before working with the :term:`Ceph Block Device`.
-Execute this quick start on the admin node.
+To use this guide, you must have executed the procedures in the `Storage
+Cluster Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is
+in an ``active + clean`` state before working with the :term:`Ceph Block
+Device`.
.. note:: The Ceph Block Device is also known as :term:`RBD` or :term:`RADOS`
Block Device.
-#. Install ``ceph-common``. ::
- sudo apt-get install ceph-common
+.. ditaa::
+ /------------------\ /----------------\
+ | Admin Node | | ceph–client |
+ | +-------->+ cCCC |
+ | ceph–deploy | | ceph |
+ \------------------/ \----------------/
-#. Create a block device image. ::
- rbd create foo --size 4096 [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring]
+You may use a virtual machine for your ``ceph-client`` node, but do not
+execute the following procedures on the same physical node as your Ceph
+Storage Cluster nodes (unless you use a VM). See `FAQ`_ for details.
-#. Load the ``rbd`` client module. ::
+
+Install Ceph
+============
+
+#. On the admin node, use ``ceph-deploy`` to install Ceph on your
+ ``ceph-client`` node. ::
+
+ ceph-deploy install ceph-client
+
+#. On the admin node, use ``ceph-deploy`` to copy the Ceph configuration file
+ and the ``ceph.client.admin.keyring`` to the ``ceph-client``. ::
+
+ ceph-deploy admin ceph-client
+
+
+Configure a Block Device
+========================
+
+#. On the ``ceph-client`` node, create a block device image. ::
+
+ rbd create foo --size 4096 [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring]
+
+#. On the ``ceph-client`` node, load the ``rbd`` client module. ::
sudo modprobe rbd
-#. Map the image to a block device. ::
+#. On the ``ceph-client`` node, map the image to a block device. ::
sudo rbd map foo --pool rbd --name client.admin [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring]
-#. Use the block device. In the following example, create a file system. ::
+#. Use the block device by creating a file system on the ``ceph-client``
+ node. ::
sudo mkfs.ext4 -m0 /dev/rbd/rbd/foo
This may take a few moments.
-#. Mount the file system. ::
+#. Mount the file system on the ``ceph-client`` node. ::
sudo mkdir /mnt/ceph-block-device
sudo mount /dev/rbd/rbd/foo /mnt/ceph-block-device
cd /mnt/ceph-block-device
-.. note:: Mount the block device on the client machine,
- not the server machine. See `FAQ`_ for details.
See `block devices`_ for additional details.
-.. _Object Store Quick Start: ../quick-ceph-deploy
+.. _Storage Cluster Quick Start: ../quick-ceph-deploy
.. _block devices: ../../rbd/rbd
.. _FAQ: http://wiki.ceph.com/03FAQs/01General_FAQ#How_Can_I_Give_Ceph_a_Try.3F
diff --git a/doc/start/quick-rgw.rst b/doc/start/quick-rgw.rst
index af48a3154c1..40cf7d4f4dc 100644
--- a/doc/start/quick-rgw.rst
+++ b/doc/start/quick-rgw.rst
@@ -2,7 +2,7 @@
Object Storage Quick Start
============================
-To use this guide, you must have executed the procedures in the `Ceph Deploy
+To use this guide, you must have executed the procedures in the `Storage Cluster
Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is in an
``active + clean`` state before working with the :term:`Ceph Object Storage`.
@@ -344,7 +344,7 @@ tutorials. See the `S3-compatible`_ and `Swift-compatible`_ APIs for details.
.. _Create rgw.conf: ../../radosgw/config/index.html#create-rgw-conf
-.. _Ceph Deploy Quick Start: ../quick-ceph-deploy
+.. _Storage Cluster Quick Start: ../quick-ceph-deploy
.. _Ceph Object Storage Manual Install: ../../radosgw/manual-install
.. _RGW Configuration: ../../radosgw/config
.. _S3-compatible: ../../radosgw/s3
diff --git a/doc/start/quick-start-preflight.rst b/doc/start/quick-start-preflight.rst
index 74dc403c211..77a54795f19 100644
--- a/doc/start/quick-start-preflight.rst
+++ b/doc/start/quick-start-preflight.rst
@@ -4,74 +4,57 @@
.. versionadded:: 0.60
-Thank you for trying Ceph! Petabyte-scale data clusters are quite an
-undertaking. Before delving deeper into Ceph, we recommend setting up a two-node
-demo cluster to explore some of the functionality. This **Preflight Checklist**
-will help you prepare an admin node and a server node for use with
-``ceph-deploy``.
-
-.. ditaa::
- /----------------\ /----------------\
- | Admin Node |<------->| Server Node |
- | cCCC | | cCCC |
- \----------------/ \----------------/
-
-
-Before you can deploy Ceph using ``ceph-deploy``, you need to ensure that you
-have a few things set up first on your admin node and on nodes running Ceph
-daemons.
-
-
-Install an Operating System
-===========================
-
-Install a recent release of Debian or Ubuntu (e.g., 12.04, 12.10, 13.04) on your
-nodes. For additional details on operating systems or to use other operating
-systems other than Debian or Ubuntu, see `OS Recommendations`_.
-
-
-Install an SSH Server
-=====================
-
-The ``ceph-deploy`` utility requires ``ssh``, so your server node(s) require an
-SSH server. ::
-
- sudo apt-get install openssh-server
-
-
-Create a User
-=============
-
-Create a user on nodes running Ceph daemons.
-
-.. tip:: We recommend a username that brute force attackers won't
- guess easily (e.g., something other than ``root``, ``ceph``, etc).
-
-::
+Thank you for trying Ceph! We recommend setting up a ``ceph-deploy`` admin node
+and a 3-node :term:`Ceph Storage Cluster` to explore the basics of Ceph. This
+**Preflight Checklist** will help you prepare a ``ceph-deploy`` admin node and
+three Ceph Nodes (or virtual machines) that will host your Ceph Storage Cluster.
+
+
+.. ditaa::
+ /------------------\ /----------------\
+ | Admin Node | | ceph–node1 |
+ | +-------->+ |
+ | ceph–deploy | | cCCC |
+ \---------+--------/ \----------------/
+ |
+ | /----------------\
+ | | ceph–node2 |
+ +----------------->+ |
+ | | cCCC |
+ | \----------------/
+ |
+ | /----------------\
+ | | ceph–node3 |
+ +----------------->| |
+ | cCCC |
+ \----------------/
+
+
+Ceph Node Setup
+===============
+
+Perform the following steps:
+
+#. Create a user on each Ceph Node. ::
ssh user@ceph-server
sudo useradd -d /home/ceph -m ceph
sudo passwd ceph
-
-``ceph-deploy`` installs packages onto your nodes. This means that
-the user you create requires passwordless ``sudo`` privileges.
-
-.. note:: We **DO NOT** recommend enabling the ``root`` password
- for security reasons.
-
-To provide full privileges to the user, add the following to
-``/etc/sudoers.d/ceph``. ::
+#. Add ``root`` privileges for the user on each Ceph Node. ::
echo "ceph ALL = (root) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/ceph
sudo chmod 0440 /etc/sudoers.d/ceph
-Configure SSH
-=============
+#. Install an SSH server (if necessary)::
-Configure your admin machine with password-less SSH access to each node
-running Ceph daemons (leave the passphrase empty). ::
+ sudo apt-get install openssh-server
+ sudo yum install openssh-server
+
+
+#. Configure your ``ceph-deploy`` admin node with password-less SSH access to
+ each Ceph Node. Leave the passphrase empty::
ssh-keygen
Generating public/private key pair.
@@ -81,77 +64,95 @@ running Ceph daemons (leave the passphrase empty). ::
Your identification has been saved in /ceph-client/.ssh/id_rsa.
Your public key has been saved in /ceph-client/.ssh/id_rsa.pub.
-Copy the key to each node running Ceph daemons::
+#. Copy the key to each Ceph Node. ::
ssh-copy-id ceph@ceph-server
-Modify your ~/.ssh/config file of your admin node so that it defaults
-to logging in as the user you created when no username is specified. ::
+
+#. Modify the ``~/.ssh/config`` file of your ``ceph-deploy`` admin node so that
+ it logs in to Ceph Nodes as the user you created (e.g., ``ceph``). ::
Host ceph-server
- Hostname ceph-server.fqdn-or-ip-address.com
- User ceph
+ Hostname ceph-server.fqdn-or-ip-address.com
+ User ceph
+
+
+#. Ensure connectivity using ``ping`` with hostnames (i.e., not IP addresses).
+ Address hostname resolution issues and firewall issues as necessary.
-.. note:: Do not call ceph-deploy with ``sudo`` or run as ``root`` if you are
- login in as a different user (as in the ssh config above) because it
- will not issue ``sudo`` commands needed on the remote host.
-Install ceph-deploy
-===================
+Ceph Deploy Setup
+=================
-To install ``ceph-deploy``, execute the following::
+Add Ceph repositories to the ``ceph-deploy`` admin node. Then, install
+``ceph-deploy``.
+
+.. important:: Do not call ``ceph-deploy`` with ``sudo`` or run it as ``root``
+ if you are logged in as a different user, because it will not issue ``sudo``
+ commands needed on the remote host.
+
+
+Advanced Package Tool (APT)
+---------------------------
+
+For Debian and Ubuntu distributions, perform the following steps:
+
+#. Add the release key::
wget -q -O- 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc' | sudo apt-key add -
echo deb http://ceph.com/debian-dumpling/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list
sudo apt-get update
sudo apt-get install ceph-deploy
+#. Add the Ceph packages to your repository. Replace ``{ceph-stable-release}``
+ with a stable Ceph release (e.g., ``cuttlefish``, ``dumpling``, etc.).
+ For example::
+
+ echo deb http://ceph.com/debian-{ceph-stable-release}/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list
-Ensure Connectivity
-===================
+#. Update your repository and install ``ceph-deploy``::
-Ensure that your admin node has connectivity to the network and to your Server
-node (e.g., ensure ``iptables``, ``ufw`` or other tools that may prevent
-connections, traffic forwarding, etc. to allow what you need).
+ sudo apt-get update && sudo apt-get install ceph-deploy
-.. tip:: The ``ceph-deploy`` tool is new and you may encounter some issues
- without effective error messages.
-Once you have completed this pre-flight checklist, you are ready to begin using
-``ceph-deploy``.
+Red Hat Package Manager (RPM)
+-----------------------------
+For Red Hat(rhel6), CentOS (el6), Fedora 17-19 (f17-f19), OpenSUSE 12
+(opensuse12), and SLES (sles11) perform the following steps:
-Hostname Resolution
-===================
+#. Add the package to your repository. Open a text editor and create a
+ Yellowdog Updater, Modified (YUM) entry. Use the file path
+ ``/etc/yum.repos.d/ceph.repo``. For example::
-Ensure that your admin node can resolve the server node's hostname. ::
+ sudo vim /etc/yum.repos.d/ceph.repo
- ping {server-node}
+ Paste the following example code. Replace ``{ceph-stable-release}`` with
+ the recent stable release of Ceph (e.g., ``dumpling``). Replace ``{distro}``
+ with your Linux distribution (e.g., ``el6`` for CentOS 6, ``rhel6`` for
+ Red Hat 6, ``fc18`` or ``fc19`` for Fedora 18 or Fedora 19, and ``sles11``
+ for SLES 11). Finally, save the contents to the
+ ``/etc/yum.repos.d/ceph.repo`` file. ::
-If you execute ``ceph-deploy`` against the localhost, ``ceph-deploy``
-must be able to resolve its IP address. Consider adding the IP address
-to your ``/etc/hosts`` file such that it resolves to the hostname. ::
+ [ceph-noarch]
+ name=Ceph noarch packages
+ baseurl=http://ceph.com/rpm-{ceph-stable-release}/{distro}/noarch
+ enabled=1
+ gpgcheck=1
+ type=rpm-md
+ gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc
- hostname
- host -4 {hostname}
- sudo vim /etc/hosts
- {ip-address} {hostname}
+#. Update your repository and install ``ceph-deploy``::
- ceph-deploy {command} {hostname}
+ sudo yum update && sudo yum install ceph-deploy
-.. tip:: The ``ceph-deploy`` tool will not resolve to ``localhost``. Use
- the hostname.
Summary
=======
-Once you have passwordless ``ssh`` connectivity, passwordless ``sudo``,
-installed ``ceph-deploy``, and you have ensured appropriate connectivity,
-proceed to the `Storage Cluster Quick Start`_.
-
-.. tip:: The ``ceph-deploy`` utility can install Ceph packages on remote
- machines from the admin node!
+This completes the Quick Start Preflight. Proceed to the `Storage Cluster
+Quick Start`_.
.. _Storage Cluster Quick Start: ../quick-ceph-deploy
.. _OS Recommendations: ../../install/os-recommendations
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index 09e55b9a842..f0fa37893b1 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -147,7 +147,9 @@ ceph mds newfs 0 1 --yes-i-really-mean-it
ceph osd pool create data2 10
poolnum=$(ceph osd dump | grep 'pool.*data2' | awk '{print $2;}')
ceph mds add_data_pool $poolnum
+ceph mds add_data_pool rbd
ceph mds remove_data_pool $poolnum
+ceph mds remove_data_pool rbd
ceph osd pool delete data2 data2 --yes-i-really-really-mean-it
ceph mds set_max_mds 4
ceph mds set_max_mds 3
@@ -325,6 +327,9 @@ ceph osd pool set data size 3
ceph osd pool get data size | grep 'size: 3'
ceph osd pool set data size 2
+ceph osd pool set data hashpspool true
+ceph osd pool set data hashpspool false
+
ceph osd pool get rbd crush_ruleset | grep 'crush_ruleset: 2'
ceph osd thrash 10
diff --git a/qa/workunits/misc/mkpool_layout_vxattrs.sh b/qa/workunits/misc/mkpool_layout_vxattrs.sh
index 16b3cdfe517..91d31664898 100755
--- a/qa/workunits/misc/mkpool_layout_vxattrs.sh
+++ b/qa/workunits/misc/mkpool_layout_vxattrs.sh
@@ -4,10 +4,12 @@ set -e
touch foo.$$
rados mkpool foo.$$
-poolid=$(ceph osd dump | grep "^pool" | awk '{print $2}' | tail -n 1)
-ceph mds add_data_pool ${poolid}
+ceph mds add_data_pool foo.$$
setfattr -n ceph.file.layout.pool -v foo.$$ foo.$$
# cleanup
-rados rmpool foo.$$ foo.$$ --yes-i-really-really-mean-it
rm foo.$$
+ceph mds remove_data_pool foo.$$
+rados rmpool foo.$$ foo.$$ --yes-i-really-really-mean-it
+
+echo OK
diff --git a/qa/workunits/suites/fsstress.sh b/qa/workunits/suites/fsstress.sh
index 7f945172687..394e5fad991 100755
--- a/qa/workunits/suites/fsstress.sh
+++ b/qa/workunits/suites/fsstress.sh
@@ -2,6 +2,7 @@
if [ ! -f /usr/lib/ltp/testcases/bin/fsstress ]
then
+ path=`pwd`
mkdir -p /tmp/fsstress
cd /tmp/fsstress
wget -q -O /tmp/fsstress/ltp-full.tgz http://ceph.com/qa/ltp-full-20091231.tgz
@@ -13,6 +14,7 @@ then
sudo cp -avf /tmp/fsstress/ltp-full-20091231/testcases/kernel/fs/fsstress/fsstress /usr/lib/ltp/testcases/bin/fsstress
sudo chmod 755 /usr/lib/ltp/testcases/bin/fsstress
rm -Rf /tmp/fsstress
+ cd $path
fi
command="/usr/lib/ltp/testcases/bin/fsstress -d fsstress-`hostname`$$ -l 1 -n 1000 -p 10 -v"
diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc
index 165ca437987..2851f2bd702 100644
--- a/src/cls/rgw/cls_rgw_client.cc
+++ b/src/cls/rgw/cls_rgw_client.cc
@@ -2,6 +2,7 @@
#include "include/types.h"
#include "cls/rgw/cls_rgw_ops.h"
+#include "cls/rgw/cls_rgw_client.h"
#include "include/rados/librados.hpp"
#include "common/debug.h"
@@ -157,6 +158,44 @@ int cls_rgw_get_dir_header(IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *he
return r;
}
+class GetDirHeaderCompletion : public ObjectOperationCompletion {
+ RGWGetDirHeader_CB *ret_ctx;
+public:
+ GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {}
+ ~GetDirHeaderCompletion() {
+ ret_ctx->put();
+ }
+ void handle_completion(int r, bufferlist& outbl) {
+ struct rgw_cls_list_ret ret;
+ try {
+ bufferlist::iterator iter = outbl.begin();
+ ::decode(ret, iter);
+ } catch (buffer::error& err) {
+ r = -EIO;
+ }
+
+ ret_ctx->handle_response(r, ret.dir.header);
+ };
+};
+
+int cls_rgw_get_dir_header_async(IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx)
+{
+ bufferlist in, out;
+ struct rgw_cls_list_op call;
+ call.num_entries = 0;
+ ::encode(call, in);
+ ObjectReadOperation op;
+ GetDirHeaderCompletion *cb = new GetDirHeaderCompletion(ctx);
+ op.exec("rgw", "bucket_list", in, cb);
+ AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+ int r = io_ctx.aio_operate(oid, c, &op, NULL);
+ c->release();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
int cls_rgw_bi_log_list(IoCtx& io_ctx, string& oid, string& marker, uint32_t max,
list<rgw_bi_log_entry>& entries, bool *truncated)
{
diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h
index 2ea5d9ca771..39bb3c9fc4a 100644
--- a/src/cls/rgw/cls_rgw_client.h
+++ b/src/cls/rgw/cls_rgw_client.h
@@ -4,6 +4,13 @@
#include "include/types.h"
#include "include/rados/librados.hpp"
#include "cls_rgw_types.h"
+#include "common/RefCountedObj.h"
+
+class RGWGetDirHeader_CB : public RefCountedObject {
+public:
+ virtual ~RGWGetDirHeader_CB() {}
+ virtual void handle_response(int r, rgw_bucket_dir_header& header) = 0;
+};
/* bucket index */
void cls_rgw_bucket_init(librados::ObjectWriteOperation& o);
@@ -27,6 +34,7 @@ int cls_rgw_bucket_check_index_op(librados::IoCtx& io_ctx, string& oid,
int cls_rgw_bucket_rebuild_index_op(librados::IoCtx& io_ctx, string& oid);
int cls_rgw_get_dir_header(librados::IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *header);
+int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx);
void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, bufferlist& updates);
diff --git a/src/common/Formatter.h b/src/common/Formatter.h
index 27089ce04f2..ac68b7f461d 100644
--- a/src/common/Formatter.h
+++ b/src/common/Formatter.h
@@ -44,6 +44,9 @@ class Formatter {
virtual void dump_int(const char *name, int64_t s) = 0;
virtual void dump_float(const char *name, double d) = 0;
virtual void dump_string(const char *name, std::string s) = 0;
+ virtual void dump_bool(const char *name, bool b) {
+ dump_format_unquoted(name, "%s", (b ? "true" : "false"));
+ }
virtual std::ostream& dump_stream(const char *name) = 0;
virtual void dump_format(const char *name, const char *fmt, ...) = 0;
virtual void dump_format_unquoted(const char *name, const char *fmt, ...) = 0;
diff --git a/src/common/common_init.cc b/src/common/common_init.cc
index ef8cf010072..8fb688cd8d3 100644
--- a/src/common/common_init.cc
+++ b/src/common/common_init.cc
@@ -73,8 +73,11 @@ CephContext *common_preinit(const CephInitParameters &iparams,
break;
}
- if ((flags & CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS) ||
- code_env != CODE_ENVIRONMENT_DAEMON) {
+ if (flags & CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS) {
+ // do nothing special! we used to do no default log, pid_file,
+ // admin_socket, but changed our minds. let's make ceph-fuse
+ // and radosgw use the same defaults as ceph-{osd,mon,mds,...}
+ } else if (code_env != CODE_ENVIRONMENT_DAEMON) {
// no default log, pid_file, admin_socket
conf->set_val_or_die("pid_file", "");
conf->set_val_or_die("admin_socket", "");
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 2d3f981379b..b419dec88b5 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -203,7 +203,7 @@ OPTION(mon_leveldb_bloom_size, OPT_INT, 0) // monitor's leveldb bloom bits per e
OPTION(mon_leveldb_max_open_files, OPT_INT, 0) // monitor's leveldb max open files
OPTION(mon_leveldb_compression, OPT_BOOL, false) // monitor's leveldb uses compression
OPTION(mon_leveldb_paranoid, OPT_BOOL, false) // monitor's leveldb paranoid flag
-OPTION(mon_leveldb_log, OPT_STR, "")
+OPTION(mon_leveldb_log, OPT_STR, "/dev/null")
OPTION(mon_leveldb_size_warn, OPT_U64, 40*1024*1024*1024) // issue a warning when the monitor's leveldb goes over 40GB (in bytes)
OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state
OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores
@@ -492,7 +492,7 @@ OPTION(osd_leveldb_bloom_size, OPT_INT, 0) // OSD's leveldb bloom bits per entry
OPTION(osd_leveldb_max_open_files, OPT_INT, 0) // OSD's leveldb max open files
OPTION(osd_leveldb_compression, OPT_BOOL, true) // OSD's leveldb uses compression
OPTION(osd_leveldb_paranoid, OPT_BOOL, false) // OSD's leveldb paranoid flag
-OPTION(osd_leveldb_log, OPT_STR, "") // enable OSD leveldb log file
+OPTION(osd_leveldb_log, OPT_STR, "/dev/null") // enable OSD leveldb log file
// determines whether PGLog::check() compares written out log to stored log
OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false)
@@ -721,6 +721,10 @@ OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data
OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") //
OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") //
+OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance
+OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions
+OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache
+
OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter
// This will be set to true when it is safe to start threads.
diff --git a/src/common/lru_map.h b/src/common/lru_map.h
index 6e7f7b3786f..1e1acc95f76 100644
--- a/src/common/lru_map.h
+++ b/src/common/lru_map.h
@@ -21,41 +21,76 @@ class lru_map {
size_t max;
public:
+ class UpdateContext {
+ public:
+ virtual ~UpdateContext() {}
+
+ /* update should return true if object is updated */
+ virtual bool update(V *v) = 0;
+ };
+
+ bool _find(const K& key, V *value, UpdateContext *ctx);
+ void _add(const K& key, V& value);
+
+public:
lru_map(int _max) : lock("lru_map"), max(_max) {}
virtual ~lru_map() {}
bool find(const K& key, V& value);
+
+ /*
+ * find_and_update()
+ *
+ * - will return true if object is found
+ * - if ctx is set will return true if object is found and updated
+ */
+ bool find_and_update(const K& key, V *value, UpdateContext *ctx);
void add(const K& key, V& value);
void erase(const K& key);
};
template <class K, class V>
-bool lru_map<K, V>::find(const K& key, V& value)
+bool lru_map<K, V>::_find(const K& key, V *value, UpdateContext *ctx)
{
- lock.Lock();
typename std::map<K, entry>::iterator iter = entries.find(key);
if (iter == entries.end()) {
- lock.Unlock();
return false;
}
entry& e = iter->second;
entries_lru.erase(e.lru_iter);
- value = e.value;
+ bool r = true;
+
+ if (ctx)
+ r = ctx->update(&e.value);
+
+ if (value)
+ *value = e.value;
entries_lru.push_front(key);
e.lru_iter = entries_lru.begin();
- lock.Unlock();
+ return r;
+}
- return true;
+template <class K, class V>
+bool lru_map<K, V>::find(const K& key, V& value)
+{
+ Mutex::Locker l(lock);
+ return _find(key, &value, NULL);
}
template <class K, class V>
-void lru_map<K, V>::add(const K& key, V& value)
+bool lru_map<K, V>::find_and_update(const K& key, V *value, UpdateContext *ctx)
+{
+ Mutex::Locker l(lock);
+ return _find(key, value, ctx);
+}
+
+template <class K, class V>
+void lru_map<K, V>::_add(const K& key, V& value)
{
- lock.Lock();
typename std::map<K, entry>::iterator iter = entries.find(key);
if (iter != entries.end()) {
entry& e = iter->second;
@@ -74,8 +109,14 @@ void lru_map<K, V>::add(const K& key, V& value)
entries.erase(iter);
entries_lru.pop_back();
}
-
- lock.Unlock();
+}
+
+
+template <class K, class V>
+void lru_map<K, V>::add(const K& key, V& value)
+{
+ Mutex::Locker l(lock);
+ _add(key, value);
}
template <class K, class V>
diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc
index ce604fe1e5d..ffdc5402caf 100644
--- a/src/global/signal_handler.cc
+++ b/src/global/signal_handler.cc
@@ -196,13 +196,13 @@ struct SignalHandler : public Thread {
lock.Lock();
int num_fds = 0;
fds[num_fds].fd = pipefd[0];
- fds[num_fds].events = POLLIN | POLLOUT | POLLERR;
+ fds[num_fds].events = POLLIN | POLLERR;
fds[num_fds].revents = 0;
++num_fds;
for (unsigned i=0; i<32; i++) {
if (handlers[i]) {
fds[num_fds].fd = handlers[i]->pipefd[0];
- fds[num_fds].events = POLLIN | POLLOUT | POLLERR;
+ fds[num_fds].events = POLLIN | POLLERR;
fds[num_fds].revents = 0;
++num_fds;
}
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 3f6d025ff41..c8de9f9df33 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -789,7 +789,12 @@ namespace librados
int cluster_stat(cluster_stat_t& result);
int cluster_fsid(std::string *fsid);
- /* pool aio */
+ /*
+ * pool aio
+ *
+ * It is up to the caller to release the completion handler, even if the pool_create_async()
+ * and/or pool_delete_async() fails and does not send the async request
+ */
static PoolAsyncCompletion *pool_async_create_completion();
// -- aio --
diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h
index efb89641466..443b2c23a17 100644
--- a/src/librados/PoolAsyncCompletionImpl.h
+++ b/src/librados/PoolAsyncCompletionImpl.h
@@ -94,6 +94,9 @@ namespace librados {
C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) {
c->get();
}
+ ~C_PoolAsync_Safe() {
+ c->put();
+ }
void finish(int r) {
c->lock.Lock();
@@ -109,7 +112,7 @@ namespace librados {
c->lock.Lock();
}
- c->put_unlock();
+ c->lock.Unlock();
}
};
}
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 9dc1229fbb9..0188d418e0d 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -632,7 +632,7 @@ void MDCache::populate_mydir()
CDir *dir = strays[i]->get_dirfrag(fg);
if (!dir)
dir = strays[i]->get_or_open_dirfrag(this, fg);
- if (!dir->is_complete()) {
+ if (dir->get_version() == 0) {
dir->fetch(new C_MDS_RetryOpenRoot(this));
return;
}
@@ -653,6 +653,8 @@ void MDCache::populate_mydir()
assert(!open);
open = true;
mds->queue_waiters(waiting_for_open);
+
+ scan_stray_dir();
}
void MDCache::open_foreign_mdsdir(inodeno_t ino, Context *fin)
@@ -9135,19 +9137,34 @@ void MDCache::_snaprealm_create_finish(MDRequest *mdr, Mutation *mut, CInode *in
// -------------------------------------------------------------------------------
// STRAYS
-void MDCache::scan_stray_dir()
+struct C_MDC_RetryScanStray : public Context {
+ MDCache *cache;
+ dirfrag_t next;
+ C_MDC_RetryScanStray(MDCache *c, dirfrag_t n) : cache(c), next(n) { }
+ void finish(int r) {
+ cache->scan_stray_dir(next);
+ }
+};
+
+void MDCache::scan_stray_dir(dirfrag_t next)
{
- dout(10) << "scan_stray_dir" << dendl;
-
+ dout(10) << "scan_stray_dir " << next << dendl;
+
list<CDir*> ls;
for (int i = 0; i < NUM_STRAY; ++i) {
- if (strays[i]) {
- strays[i]->get_dirfrags(ls);
- }
+ if (strays[i]->ino() < next.ino)
+ continue;
+ strays[i]->get_dirfrags(ls);
}
for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p) {
CDir *dir = *p;
+ if (dir->dirfrag() < next)
+ continue;
+ if (!dir->is_complete()) {
+ dir->fetch(new C_MDC_RetryScanStray(this, dir->dirfrag()));
+ return;
+ }
for (CDir::map_t::iterator q = dir->items.begin(); q != dir->items.end(); ++q) {
CDentry *dn = q->second;
CDentry::linkage_t *dnl = dn->get_projected_linkage();
@@ -9354,8 +9371,12 @@ void MDCache::purge_stray(CDentry *dn)
if (in->is_file()) {
uint64_t period = (uint64_t)in->inode.layout.fl_object_size *
(uint64_t)in->inode.layout.fl_stripe_count;
- uint64_t cur_max_size = in->inode.get_max_size();
- uint64_t to = MAX(in->inode.size, cur_max_size);
+ uint64_t to = in->inode.get_max_size();
+ to = MAX(in->inode.size, to);
+ // when truncating a file, the filer does not delete stripe objects that are
+ // truncated to zero. so we need to purge stripe objects up to the max size
+ // the file has ever been.
+ to = MAX(in->inode.max_size_ever, to);
if (to && period) {
uint64_t num = (to + period - 1) / period;
dout(10) << "purge_stray 0~" << to << " objects 0~" << num
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index d8f2a9486fb..416c6454292 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -870,7 +870,6 @@ public:
public:
elist<CDentry*> delayed_eval_stray;
- void scan_stray_dir();
void eval_stray(CDentry *dn, bool delay=false);
void eval_remote(CDentry *dn);
@@ -884,11 +883,13 @@ public:
eval_stray(dn, delay);
}
protected:
+ void scan_stray_dir(dirfrag_t next=dirfrag_t());
void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin);
void purge_stray(CDentry *dn);
void _purge_stray_purged(CDentry *dn, int r=0);
void _purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls);
void _purge_stray_logged_truncate(CDentry *dn, LogSegment *ls);
+ friend class C_MDC_RetryScanStray;
friend class C_MDC_FetchedBacktrace;
friend class C_MDC_PurgeStrayLogged;
friend class C_MDC_PurgeStrayLoggedTruncate;
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index c2e0bbbe369..83722274981 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -1525,7 +1525,6 @@ void MDS::active_start()
mdcache->open_root();
mdcache->clean_open_file_lists();
- mdcache->scan_stray_dir();
mdcache->export_remaining_imported_caps();
finish_contexts(g_ceph_context, waiting_for_replay); // kick waiters
finish_contexts(g_ceph_context, waiting_for_active); // kick waiters
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 869f3773441..41862847e27 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -3086,6 +3086,7 @@ void Server::handle_client_file_readlock(MDRequest *mdr)
checking_lock.length = req->head.args.filelock_change.length;
checking_lock.client = req->get_orig_source().num();
checking_lock.pid = req->head.args.filelock_change.pid;
+ checking_lock.pid_namespace = req->head.args.filelock_change.pid_namespace;
checking_lock.type = req->head.args.filelock_change.type;
// get the appropriate lock state
diff --git a/src/mds/flock.h b/src/mds/flock.h
index ae93d1660f0..b767fe58507 100644
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -12,7 +12,7 @@
inline ostream& operator<<(ostream& out, ceph_filelock& l) {
out << "start: " << l.start << ", length: " << l.length
<< ", client: " << l.client << ", pid: " << l.pid
- << ", type: " << (int)l.type
+ << ", pid_ns: " << l.pid_namespace << ", type: " << (int)l.type
<< std::endl;
return out;
}
diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc
index 6886786f27e..362f74774c4 100644
--- a/src/mds/mdstypes.cc
+++ b/src/mds/mdstypes.cc
@@ -204,7 +204,7 @@ ostream& operator<<(ostream& out, const client_writeable_range_t& r)
*/
void inode_t::encode(bufferlist &bl) const
{
- ENCODE_START(7, 6, bl);
+ ENCODE_START(8, 6, bl);
::encode(ino, bl);
::encode(rdev, bl);
@@ -238,6 +238,7 @@ void inode_t::encode(bufferlist &bl) const
::encode(xattr_version, bl);
::encode(backtrace_version, bl);
::encode(old_pools, bl);
+ ::encode(max_size_ever, bl);
ENCODE_FINISH(bl);
}
@@ -294,6 +295,8 @@ void inode_t::decode(bufferlist::iterator &p)
::decode(backtrace_version, p);
if (struct_v >= 7)
::decode(old_pools, p);
+ if (struct_v >= 8)
+ ::decode(max_size_ever, p);
DECODE_FINISH(p);
}
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 2a3874818b7..bd53c85b48d 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -329,6 +329,7 @@ struct inode_t {
ceph_file_layout layout;
vector <int64_t> old_pools;
uint64_t size; // on directory, # dentries
+ uint64_t max_size_ever; // max size the file has ever been
uint32_t truncate_seq;
uint64_t truncate_size, truncate_from;
uint32_t truncate_pending;
@@ -353,7 +354,8 @@ struct inode_t {
inode_t() : ino(0), rdev(0),
mode(0), uid(0), gid(0),
nlink(0), anchored(false),
- size(0), truncate_seq(0), truncate_size(0), truncate_from(0),
+ size(0), max_size_ever(0),
+ truncate_seq(0), truncate_size(0), truncate_from(0),
truncate_pending(0),
time_warp_seq(0),
version(0), file_data_version(0), xattr_version(0), backtrace_version(0) {
@@ -369,6 +371,8 @@ struct inode_t {
bool is_truncating() const { return (truncate_pending > 0); }
void truncate(uint64_t old_size, uint64_t new_size) {
assert(new_size < old_size);
+ if (old_size > max_size_ever)
+ max_size_ever = old_size;
truncate_from = old_size;
size = new_size;
rstat.rbytes = new_size;
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 48c1c99d584..b865c379d1a 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -951,21 +951,44 @@ bool MDSMonitor::prepare_command(MMonCommand *m)
}
}
} else if (prefix == "mds add_data_pool") {
- int64_t poolid;
- cmd_getval(g_ceph_context, cmdmap, "poolid", poolid);
- pending_mdsmap.add_data_pool(poolid);
- ss << "added data pool " << poolid << " to mdsmap";
- r = 0;
-
- } else if (prefix == "mds remove_data_pool") {
- int64_t poolid;
- cmd_getval(g_ceph_context, cmdmap, "poolid", poolid);
- r = pending_mdsmap.remove_data_pool(poolid);
- if (r == -ENOENT)
+ string poolname;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
+ int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
+ if (poolid < 0) {
+ string err;
+ poolid = strict_strtol(poolname.c_str(), 10, &err);
+ if (err.length()) {
+ r = -ENOENT;
+ poolid = -1;
+ ss << "pool '" << poolname << "' does not exist";
+ }
+ }
+ if (poolid >= 0) {
+ pending_mdsmap.add_data_pool(poolid);
+ ss << "added data pool " << poolid << " to mdsmap";
r = 0;
- if (r == 0)
- ss << "removed data pool " << poolid << " from mdsmap";
-
+ }
+ } else if (prefix == "mds remove_data_pool") {
+ string poolname;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
+ int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
+ if (poolid < 0) {
+ string err;
+ poolid = strict_strtol(poolname.c_str(), 10, &err);
+ if (err.length()) {
+ r = -ENOENT;
+ poolid = -1;
+ ss << "pool '" << poolname << "' does not exist";
+ }
+ }
+ if (poolid >= 0) {
+ cmd_getval(g_ceph_context, cmdmap, "poolid", poolid);
+ r = pending_mdsmap.remove_data_pool(poolid);
+ if (r == -ENOENT)
+ r = 0;
+ if (r == 0)
+ ss << "removed data pool " << poolid << " from mdsmap";
+ }
} else if (prefix == "mds newfs") {
MDSMap newmap;
int64_t metadata, data;
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 33e00a98d30..ae6bffe0d7d 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -284,11 +284,11 @@ COMMAND("mds unset " \
"name=sure,type=CephString,req=false", \
"unset <key>", "mds", "w", "cli,rest")
COMMAND("mds add_data_pool " \
- "name=poolid,type=CephInt,range=0", \
- "add data pool <poolid>", "mds", "rw", "cli,rest")
+ "name=pool,type=CephString", \
+ "add data pool <pool>", "mds", "rw", "cli,rest")
COMMAND("mds remove_data_pool " \
- "name=poolid,type=CephInt,range=0", \
- "remove data pool <poolid>", "mds", "rw", "cli,rest")
+ "name=pool,type=CephString", \
+ "remove data pool <pool>", "mds", "rw", "cli,rest")
COMMAND("mds newfs " \
"name=metadata,type=CephInt,range=0 " \
"name=data,type=CephInt,range=0 " \
@@ -507,8 +507,8 @@ COMMAND("osd pool get " \
"get pool parameter <var>", "osd", "r", "cli,rest")
COMMAND("osd pool set " \
"name=pool,type=CephPoolname " \
- "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset " \
- "name=val,type=CephInt", \
+ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool " \
+ "name=val,type=CephString", \
"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
// 'val' is a CephString because it can include a unit. Perhaps
// there should be a Python type for validation/conversion of strings
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 9144736d801..425375b29e2 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2618,6 +2618,125 @@ void OSDMonitor::parse_loc_map(const vector<string>& args, map<string,string> *
}
}
+int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
+ stringstream& ss)
+{
+ string poolstr;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
+ int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str());
+ if (pool < 0) {
+ ss << "unrecognized pool '" << poolstr << "'";
+ return -ENOENT;
+ }
+ string var;
+ cmd_getval(g_ceph_context, cmdmap, "var", var);
+
+ pg_pool_t p = *osdmap.get_pg_pool(pool);
+ if (pending_inc.new_pools.count(pool))
+ p = pending_inc.new_pools[pool];
+
+ // accept val as a json string or int, and parse out int or float
+ // values from the string as needed
+ string val;
+ cmd_getval(g_ceph_context, cmdmap, "val", val);
+ string interr;
+ int64_t n = 0;
+ if (!cmd_getval(g_ceph_context, cmdmap, "val", n))
+ n = strict_strtoll(val.c_str(), 10, &interr);
+ string floaterr;
+ float f;
+ if (!cmd_getval(g_ceph_context, cmdmap, "val", f))
+ f = strict_strtod(val.c_str(), &floaterr);
+
+ if (var == "size") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ if (n == 0 || n > 10) {
+ ss << "pool size must be between 1 and 10";
+ return -EINVAL;
+ }
+ p.size = n;
+ if (n < p.min_size)
+ p.min_size = n;
+ ss << "set pool " << pool << " size to " << n;
+ } else if (var == "min_size") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ p.min_size = n;
+ ss << "set pool " << pool << " min_size to " << n;
+ } else if (var == "crash_replay_interval") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ p.crash_replay_interval = n;
+ ss << "set pool " << pool << " to crash_replay_interval to " << n;
+ } else if (var == "pg_num") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ if (n <= (int)p.get_pg_num()) {
+ ss << "specified pg_num " << n << " <= current " << p.get_pg_num();
+ } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) {
+ ss << "currently creating pgs, wait";
+ return -EAGAIN;
+ } else {
+ p.set_pg_num(n);
+ ss << "set pool " << pool << " pg_num to " << n;
+ }
+ } else if (var == "pgp_num") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ if (n > (int)p.get_pg_num()) {
+ ss << "specified pgp_num " << n << " > pg_num " << p.get_pg_num();
+ } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) {
+ ss << "still creating pgs, wait";
+ return -EAGAIN;
+ } else {
+ p.set_pgp_num(n);
+ ss << "set pool " << pool << " pgp_num to " << n;
+ }
+ } else if (var == "crush_ruleset") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ if (osdmap.crush->rule_exists(n)) {
+ p.crush_ruleset = n;
+ ss << "set pool " << pool << " crush_ruleset to " << n;
+ } else {
+ ss << "crush ruleset " << n << " does not exist";
+ return -ENOENT;
+ }
+ } else if (var == "hashpspool") {
+ if (val == "true") {
+ p.flags |= pg_pool_t::FLAG_HASHPSPOOL;
+ ss << "set";
+ } else if (val == "false") {
+ p.flags ^= pg_pool_t::FLAG_HASHPSPOOL;
+ ss << "unset";
+ } else {
+ ss << "expecting value true or false";
+ return -EINVAL;
+ }
+ ss << " pool " << pool << " flag hashpspool";
+ } else {
+ ss << "unrecognized variable '" << var << "'";
+ return -EINVAL;
+ }
+
+ p.last_change = pending_inc.epoch;
+ pending_inc.new_pools[pool] = p;
+ return 0;
+}
+
bool OSDMonitor::prepare_command(MMonCommand *m)
{
bool ret = false;
@@ -3586,73 +3705,13 @@ done:
return true;
}
} else if (prefix == "osd pool set") {
- // set a pool variable to a positive int
- string poolstr;
- cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
- int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str());
- if (pool < 0) {
- ss << "unrecognized pool '" << poolstr << "'";
- err = -ENOENT;
- } else {
- const pg_pool_t *p = osdmap.get_pg_pool(pool);
- int64_t n;
- cmd_getval(g_ceph_context, cmdmap, "val", n);
- string var;
- cmd_getval(g_ceph_context, cmdmap, "var", var);
- if (var == "size") {
- if (n == 0 || n > 10) {
- ss << "pool size must be between 1 and 10";
- err = -EINVAL;
- goto reply;
- }
- pending_inc.get_new_pool(pool, p)->size = n;
- if (n < p->min_size)
- pending_inc.get_new_pool(pool, p)->min_size = n;
- ss << "set pool " << pool << " size to " << n;
- } else if (var == "min_size") {
- pending_inc.get_new_pool(pool, p)->min_size = n;
- ss << "set pool " << pool << " min_size to " << n;
- } else if (var == "crash_replay_interval") {
- pending_inc.get_new_pool(pool, p)->crash_replay_interval = n;
- ss << "set pool " << pool << " to crash_replay_interval to " << n;
- } else if (var == "pg_num") {
- if (n <= p->get_pg_num()) {
- ss << "specified pg_num " << n << " <= current " << p->get_pg_num();
- err = -EINVAL;
- } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) {
- ss << "busy creating pgs; try again later";
- err = -EAGAIN;
- } else {
- pending_inc.get_new_pool(pool, p)->set_pg_num(n);
- ss << "set pool " << pool << " pg_num to " << n;
- }
- } else if (var == "pgp_num") {
- if (n > p->get_pg_num()) {
- ss << "specified pgp_num " << n << " > pg_num " << p->get_pg_num();
- } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) {
- ss << "busy creating pgs; try again later";
- err = -EAGAIN;
- } else {
- pending_inc.get_new_pool(pool, p)->set_pgp_num(n);
- ss << "set pool " << pool << " pgp_num to " << n;
- }
- } else if (var == "crush_ruleset") {
- if (osdmap.crush->rule_exists(n)) {
- pending_inc.get_new_pool(pool, p)->crush_ruleset = n;
- ss << "set pool " << pool << " crush_ruleset to " << n;
- } else {
- ss << "crush ruleset " << n << " does not exist";
- err = -ENOENT;
- }
- } else {
- err = -EINVAL;
- goto reply;
- }
- pending_inc.get_new_pool(pool, p)->last_change = pending_inc.epoch;
- getline(ss, rs);
- wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed()));
- return true;
- }
+ err = prepare_command_pool_set(cmdmap, ss);
+ if (err < 0)
+ goto reply;
+
+ getline(ss, rs);
+ wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed()));
+ return true;
} else if (prefix == "osd tier add") {
string poolstr;
cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index 304f9c4f609..439c8435055 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -324,6 +324,9 @@ private:
bool preprocess_command(MMonCommand *m);
bool prepare_command(MMonCommand *m);
+ int prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
+ stringstream& ss);
+
void handle_osd_timeouts(const utime_t &now,
std::map<int,utime_t> &last_osd_report);
void mark_all_down();
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 3506c4a4ccd..6940dff1405 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -201,7 +201,9 @@ int FileStore::lfn_open(coll_t cid,
IndexedPath *path,
Index *index)
{
- assert(get_allow_sharded_objects() || oid.shard_id == ghobject_t::NO_SHARD);
+ assert(get_allow_sharded_objects() ||
+ ( oid.shard_id == ghobject_t::NO_SHARD &&
+ oid.generation == ghobject_t::NO_GEN ));
assert(outfd);
int flags = O_RDWR;
if (create)
@@ -2585,8 +2587,10 @@ int FileStore::fiemap(coll_t cid, const ghobject_t& oid,
if (r < 0)
goto done;
- if (fiemap->fm_mapped_extents == 0)
+ if (fiemap->fm_mapped_extents == 0) {
+ free(fiemap);
goto done;
+ }
struct fiemap_extent *extent = &fiemap->fm_extents[0];
@@ -2620,6 +2624,7 @@ int FileStore::fiemap(coll_t cid, const ghobject_t& oid,
i++;
extent++;
}
+ free(fiemap);
}
done:
@@ -2629,7 +2634,6 @@ done:
}
dout(10) << "fiemap " << cid << "/" << oid << " " << offset << "~" << len << " = " << r << " num_extents=" << exomap.size() << " " << exomap << dendl;
- free(fiemap);
assert(!m_filestore_fail_eio || r != -EIO);
return r;
}
diff --git a/src/os/GenericFileStoreBackend.cc b/src/os/GenericFileStoreBackend.cc
index 81d896a0943..f19ba7d7760 100644
--- a/src/os/GenericFileStoreBackend.cc
+++ b/src/os/GenericFileStoreBackend.cc
@@ -124,12 +124,12 @@ int GenericFileStoreBackend::detect_features()
dout(0) << "detect_features: FIEMAP ioctl is supported and appears to work" << dendl;
ioctl_fiemap = true;
}
+ free(fiemap);
}
if (!m_filestore_fiemap) {
dout(0) << "detect_features: FIEMAP ioctl is disabled via 'filestore fiemap' config option" << dendl;
ioctl_fiemap = false;
}
- free(fiemap);
::unlink(fn);
TEMP_FAILURE_RETRY(::close(fd));
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index b2aa2ebbcd2..d450fd543e6 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3278,13 +3278,15 @@ bool remove_dir(
ObjectStore *store, SnapMapper *mapper,
OSDriver *osdriver,
ObjectStore::Sequencer *osr,
- coll_t coll, DeletingStateRef dstate)
+ coll_t coll, DeletingStateRef dstate,
+ ThreadPool::TPHandle &handle)
{
vector<ghobject_t> olist;
int64_t num = 0;
ObjectStore::Transaction *t = new ObjectStore::Transaction;
ghobject_t next;
while (!next.is_max()) {
+ handle.reset_tp_timeout();
store->collection_list_partial(
coll,
next,
@@ -3306,7 +3308,9 @@ bool remove_dir(
C_SaferCond waiter;
store->queue_transaction(osr, t, &waiter);
bool cont = dstate->pause_clearing();
+ handle.suspend_tp_timeout();
waiter.wait();
+ handle.reset_tp_timeout();
if (cont)
cont = dstate->resume_clearing();
delete t;
@@ -3322,14 +3326,18 @@ bool remove_dir(
C_SaferCond waiter;
store->queue_transaction(osr, t, &waiter);
bool cont = dstate->pause_clearing();
+ handle.suspend_tp_timeout();
waiter.wait();
+ handle.reset_tp_timeout();
if (cont)
cont = dstate->resume_clearing();
delete t;
return cont;
}
-void OSD::RemoveWQ::_process(pair<PGRef, DeletingStateRef> item)
+void OSD::RemoveWQ::_process(
+ pair<PGRef, DeletingStateRef> item,
+ ThreadPool::TPHandle &handle)
{
PGRef pg(item.first);
SnapMapper &mapper = pg->snap_mapper;
@@ -3346,7 +3354,8 @@ void OSD::RemoveWQ::_process(pair<PGRef, DeletingStateRef> item)
i != colls_to_remove.end();
++i) {
bool cont = remove_dir(
- pg->cct, store, &mapper, &driver, pg->osr.get(), *i, item.second);
+ pg->cct, store, &mapper, &driver, pg->osr.get(), *i, item.second,
+ handle);
if (!cont)
return;
}
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 9346cee6890..f7559da3be5 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -1681,7 +1681,7 @@ protected:
remove_queue.pop_front();
return item;
}
- void _process(pair<PGRef, DeletingStateRef>);
+ void _process(pair<PGRef, DeletingStateRef>, ThreadPool::TPHandle &);
void _clear() {
remove_queue.clear();
}
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 27f7b171677..1a9dde665cf 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -655,6 +655,7 @@ void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
void pg_pool_t::dump(Formatter *f) const
{
f->dump_unsigned("flags", get_flags());
+ f->dump_string("flags_names", get_flags_string());
f->dump_int("type", get_type());
f->dump_int("size", get_size());
f->dump_int("min_size", get_min_size());
@@ -1054,7 +1055,7 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
<< " last_change " << p.get_last_change()
<< " owner " << p.get_auid();
if (p.flags)
- out << " flags " << p.flags;
+ out << " flags " << p.get_flags_string();
if (p.crash_replay_interval)
out << " crash_replay_interval " << p.crash_replay_interval;
if (p.quota_max_bytes)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index a54fc65f375..8ceeb539c1a 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -725,6 +725,28 @@ struct pg_pool_t {
FLAG_FULL = 2, // pool is full
};
+ static const char *get_flag_name(int f) {
+ switch (f) {
+ case FLAG_HASHPSPOOL: return "hashpspool";
+ case FLAG_FULL: return "full";
+ default: return "???";
+ }
+ }
+ static string get_flags_string(uint64_t f) {
+ string s;
+ for (unsigned n=0; f && n<64; ++n) {
+ if (f & (1ull << n)) {
+ if (s.length())
+ s += ",";
+ s += get_flag_name(1ull << n);
+ }
+ }
+ return s;
+ }
+ string get_flags_string() const {
+ return get_flags_string(flags);
+ }
+
typedef enum {
CACHEMODE_NONE = 0, ///< no caching
CACHEMODE_WRITEBACK = 1, ///< write to cache, flush later
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 1196633276d..938c97a4f31 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -386,7 +386,6 @@ struct ObjectOperation {
pwatchers->push_back(ow);
}
}
- *prval = 0;
}
catch (buffer::error& e) {
if (prval)
@@ -424,8 +423,6 @@ struct ObjectOperation {
}
psnaps->seq = resp.seq;
}
- if (prval)
- *prval = 0;
}
catch (buffer::error& e) {
if (prval)
@@ -617,10 +614,9 @@ struct ObjectOperation {
}
::decode(*cursor, p);
} catch (buffer::error& e) {
- r = -EIO;
+ if (prval)
+ *prval = -EIO;
}
- if (prval)
- *prval = r;
}
};
@@ -664,10 +660,9 @@ struct ObjectOperation {
if (pisdirty)
*pisdirty = isdirty;
} catch (buffer::error& e) {
- r = -EIO;
+ if (prval)
+ *prval = -EIO;
}
- if (prval)
- *prval = r;
}
};
diff --git a/src/rgw/Makefile.am b/src/rgw/Makefile.am
index 24060b52e25..b92c35e08d6 100644
--- a/src/rgw/Makefile.am
+++ b/src/rgw/Makefile.am
@@ -31,7 +31,8 @@ librgw_la_SOURCES = \
rgw/rgw_auth_s3.cc \
rgw/rgw_metadata.cc \
rgw/rgw_replica_log.cc \
- rgw/rgw_keystone.cc
+ rgw/rgw_keystone.cc \
+ rgw/rgw_quota.cc
librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS}
noinst_LTLIBRARIES += librgw.la
@@ -124,6 +125,7 @@ noinst_HEADERS += \
rgw/rgw_http_client.h \
rgw/rgw_swift.h \
rgw/rgw_swift_auth.h \
+ rgw/rgw_quota.h \
rgw/rgw_rados.h \
rgw/rgw_replica_log.h \
rgw/rgw_resolve.h \
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 81abb231b6f..b23bf3ba5d4 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -62,6 +62,9 @@ void _usage()
cerr << " bucket check check bucket index\n";
cerr << " object rm remove object\n";
cerr << " object unlink unlink object from bucket index\n";
+ cerr << " quota set set quota params\n";
+ cerr << " quota enable enable quota\n";
+ cerr << " quota disable disable quota\n";
cerr << " region get show region info\n";
cerr << " regions list list all regions set on this cluster\n";
cerr << " region set set region info (requires infile)\n";
@@ -154,6 +157,11 @@ void _usage()
cerr << " --yes-i-really-mean-it required for certain operations\n";
cerr << "\n";
cerr << "<date> := \"YYYY-MM-DD[ hh:mm:ss]\"\n";
+ cerr << "\nQuota options:\n";
+ cerr << " --bucket specified bucket for quota command\n";
+ cerr << " --max-objects specify max objects\n";
+ cerr << " --max-size specify max size (in bytes)\n";
+ cerr << " --quota-scope scope of quota (bucket, user)\n";
cerr << "\n";
generic_client_usage();
}
@@ -203,6 +211,9 @@ enum {
OPT_OBJECT_RM,
OPT_OBJECT_UNLINK,
OPT_OBJECT_STAT,
+ OPT_QUOTA_SET,
+ OPT_QUOTA_ENABLE,
+ OPT_QUOTA_DISABLE,
OPT_GC_LIST,
OPT_GC_PROCESS,
OPT_REGION_GET,
@@ -253,6 +264,7 @@ static int get_cmd(const char *cmd, const char *prev_cmd, bool *need_more)
strcmp(cmd, "opstate") == 0 ||
strcmp(cmd, "pool") == 0 ||
strcmp(cmd, "pools") == 0 ||
+ strcmp(cmd, "quota") == 0 ||
strcmp(cmd, "region") == 0 ||
strcmp(cmd, "regions") == 0 ||
strcmp(cmd, "region-map") == 0 ||
@@ -362,6 +374,13 @@ static int get_cmd(const char *cmd, const char *prev_cmd, bool *need_more)
return OPT_REGION_SET;
if (strcmp(cmd, "default") == 0)
return OPT_REGION_DEFAULT;
+ } else if (strcmp(prev_cmd, "quota") == 0) {
+ if (strcmp(cmd, "set") == 0)
+ return OPT_QUOTA_SET;
+ if (strcmp(cmd, "enable") == 0)
+ return OPT_QUOTA_ENABLE;
+ if (strcmp(cmd, "disable") == 0)
+ return OPT_QUOTA_DISABLE;
} else if (strcmp(prev_cmd, "regions") == 0) {
if (strcmp(cmd, "list") == 0)
return OPT_REGION_LIST;
@@ -660,6 +679,64 @@ static bool dump_string(const char *field_name, bufferlist& bl, Formatter *f)
return true;
}
+void set_quota_info(RGWQuotaInfo& quota, int opt_cmd, int64_t max_size, int64_t max_objects)
+{
+ switch (opt_cmd) {
+ case OPT_QUOTA_ENABLE:
+ quota.enabled = true;
+
+ // falling through on purpose
+
+ case OPT_QUOTA_SET:
+ if (max_objects >= 0) {
+ quota.max_objects = max_objects;
+ }
+ if (max_size >= 0) {
+ quota.max_size_kb = rgw_rounded_kb(max_size);
+ }
+ break;
+ case OPT_QUOTA_DISABLE:
+ quota.enabled = false;
+ break;
+ }
+}
+
+int set_bucket_quota(RGWRados *store, int opt_cmd, string& bucket_name, int64_t max_size, int64_t max_objects)
+{
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+ int r = store->get_bucket_info(NULL, bucket_name, bucket_info, NULL, &attrs);
+ if (r < 0) {
+ cerr << "could not get bucket info for bucket=" << bucket_name << ": " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+
+ set_quota_info(bucket_info.quota, opt_cmd, max_size, max_objects);
+
+ r = store->put_bucket_instance_info(bucket_info, false, 0, &attrs);
+ if (r < 0) {
+ cerr << "ERROR: failed writing bucket instance info: " << cpp_strerror(-r) << std::endl;
+ return -r;
+ }
+ return 0;
+}
+
+int set_user_bucket_quota(int opt_cmd, RGWUser& user, RGWUserAdminOpState& op_state, int64_t max_size, int64_t max_objects)
+{
+ RGWUserInfo& user_info = op_state.get_user_info();
+
+ set_quota_info(user_info.bucket_quota, opt_cmd, max_size, max_objects);
+
+ op_state.set_bucket_quota(user_info.bucket_quota);
+
+ string err;
+ int r = user.modify(op_state, &err);
+ if (r < 0) {
+ cerr << "ERROR: failed updating user info: " << cpp_strerror(-r) << ": " << err << std::endl;
+ return -r;
+ }
+ return 0;
+}
int main(int argc, char **argv)
{
@@ -721,6 +798,10 @@ int main(int argc, char **argv)
string replica_log_type_str;
ReplicaLogType replica_log_type = ReplicaLog_Invalid;
string op_mask_str;
+ string quota_scope;
+
+ int64_t max_objects = -1;
+ int64_t max_size = -1;
std::string val;
std::ostringstream errs;
@@ -788,6 +869,10 @@ int main(int argc, char **argv)
max_buckets = atoi(val.c_str());
} else if (ceph_argparse_witharg(args, i, &val, "--max-entries", (char*)NULL)) {
max_entries = atoi(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-size", (char*)NULL)) {
+ max_size = (int64_t)atoll(val.c_str());
+ } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) {
+ max_objects = (int64_t)atoll(val.c_str());
} else if (ceph_argparse_witharg(args, i, &val, "--date", "--time", (char*)NULL)) {
date = val;
if (end_date.empty())
@@ -848,6 +933,8 @@ int main(int argc, char **argv)
start_marker = val;
} else if (ceph_argparse_witharg(args, i, &val, "--end-marker", (char*)NULL)) {
end_marker = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--quota-scope", (char*)NULL)) {
+ quota_scope = val;
} else if (ceph_argparse_witharg(args, i, &val, "--replica-log-type", (char*)NULL)) {
replica_log_type_str = val;
replica_log_type = get_replicalog_type(replica_log_type_str);
@@ -2228,5 +2315,28 @@ next:
return -ret;
}
}
+
+ bool quota_op = (opt_cmd == OPT_QUOTA_SET || opt_cmd == OPT_QUOTA_ENABLE || opt_cmd == OPT_QUOTA_DISABLE);
+
+ if (quota_op) {
+ if (bucket_name.empty() && user_id.empty()) {
+ cerr << "ERROR: bucket name or uid is required for quota operation" << std::endl;
+ return EINVAL;
+ }
+
+ if (!bucket_name.empty()) {
+ if (!quota_scope.empty() && quota_scope != "bucket") {
+ cerr << "ERROR: invalid quota scope specification." << std::endl;
+ return EINVAL;
+ }
+ set_bucket_quota(store, opt_cmd, bucket_name, max_size, max_objects);
+ } else if (!user_id.empty()) {
+ if (quota_scope != "bucket") {
+ cerr << "ERROR: only bucket-level user quota can be handled. Please specify --quota-scope=bucket" << std::endl;
+ return EINVAL;
+ }
+ set_user_bucket_quota(opt_cmd, user, user_op, max_size, max_objects);
+ }
+ }
return 0;
}
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 5356417f09a..3267bc51948 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -901,6 +901,7 @@ static int bucket_stats(RGWRados *store, std::string& bucket_name, Formatter *f
formatter->dump_int("mtime", mtime);
formatter->dump_string("max_marker", max_marker);
dump_bucket_usage(stats, formatter);
+ encode_json("bucket_quota", bucket_info.quota, formatter);
formatter->close_section();
return 0;
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 2c7c0c716be..baf60001a8b 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -29,6 +29,7 @@
#include "include/utime.h"
#include "rgw_acl.h"
#include "rgw_cors.h"
+#include "rgw_quota.h"
#include "cls/version/cls_version_types.h"
#include "include/rados/librados.hpp"
@@ -90,6 +91,7 @@ using ceph::crypto::MD5;
#define RGW_OP_TYPE_WRITE 0x02
#define RGW_OP_TYPE_DELETE 0x04
+#define RGW_OP_TYPE_MODIFY (RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE)
#define RGW_OP_TYPE_ALL (RGW_OP_TYPE_READ | RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE)
#define RGW_DEFAULT_MAX_BUCKETS 1000
@@ -128,6 +130,7 @@ using ceph::crypto::MD5;
#define ERR_NOT_FOUND 2023
#define ERR_PERMANENT_REDIRECT 2024
#define ERR_LOCKED 2025
+#define ERR_QUOTA_EXCEEDED 2026
#define ERR_USER_SUSPENDED 2100
#define ERR_INTERNAL_ERROR 2200
@@ -423,11 +426,12 @@ struct RGWUserInfo
__u8 system;
string default_placement;
list<string> placement_tags;
+ RGWQuotaInfo bucket_quota;
RGWUserInfo() : auid(0), suspended(0), max_buckets(RGW_DEFAULT_MAX_BUCKETS), op_mask(RGW_OP_TYPE_ALL), system(0) {}
void encode(bufferlist& bl) const {
- ENCODE_START(13, 9, bl);
+ ENCODE_START(14, 9, bl);
::encode(auid, bl);
string access_key;
string secret_key;
@@ -462,6 +466,7 @@ struct RGWUserInfo
::encode(system, bl);
::encode(default_placement, bl);
::encode(placement_tags, bl);
+ ::encode(bucket_quota, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
@@ -518,6 +523,9 @@ struct RGWUserInfo
::decode(default_placement, bl);
::decode(placement_tags, bl); /* tags of allowed placement rules */
}
+ if (struct_v >= 14) {
+ ::decode(bucket_quota, bl);
+ }
DECODE_FINISH(bl);
}
void dump(Formatter *f) const;
@@ -599,6 +607,10 @@ struct rgw_bucket {
void dump(Formatter *f) const;
void decode_json(JSONObj *obj);
static void generate_test_instances(list<rgw_bucket*>& o);
+
+ bool operator<(const rgw_bucket& b) const {
+ return name.compare(b.name) < 0;
+ }
};
WRITE_CLASS_ENCODER(rgw_bucket)
@@ -661,9 +673,10 @@ struct RGWBucketInfo
bool has_instance_obj;
RGWObjVersionTracker objv_tracker; /* we don't need to serialize this, for runtime tracking */
obj_version ep_objv; /* entry point object version, for runtime tracking only */
+ RGWQuotaInfo quota;
void encode(bufferlist& bl) const {
- ENCODE_START(8, 4, bl);
+ ENCODE_START(9, 4, bl);
::encode(bucket, bl);
::encode(owner, bl);
::encode(flags, bl);
@@ -672,6 +685,7 @@ struct RGWBucketInfo
::encode(ct, bl);
::encode(placement_rule, bl);
::encode(has_instance_obj, bl);
+ ::encode(quota, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator& bl) {
@@ -692,6 +706,8 @@ struct RGWBucketInfo
::decode(placement_rule, bl);
if (struct_v >= 8)
::decode(has_instance_obj, bl);
+ if (struct_v >= 9)
+ ::decode(quota, bl);
DECODE_FINISH(bl);
}
void dump(Formatter *f) const;
@@ -754,6 +770,8 @@ struct RGWBucketStats
uint64_t num_kb;
uint64_t num_kb_rounded;
uint64_t num_objects;
+
+ RGWBucketStats() : num_kb(0), num_kb_rounded(0), num_objects(0) {}
};
struct req_state;
@@ -1213,6 +1231,11 @@ static inline const char *rgw_obj_category_name(RGWObjCategory category)
return "unknown";
}
+static inline uint64_t rgw_rounded_kb(uint64_t bytes)
+{
+ return (bytes + 1023) / 1024;
+}
+
extern string rgw_string_unquote(const string& s);
extern void parse_csv_string(const string& ival, vector<string>& ovals);
extern int parse_key_value(string& in_str, string& key, string& val);
diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h
index 6cb9fabf6c0..ba3e522651f 100644
--- a/src/rgw/rgw_http_errors.h
+++ b/src/rgw/rgw_http_errors.h
@@ -36,6 +36,7 @@ const static struct rgw_http_errors RGW_HTTP_ERRORS[] = {
{ EPERM, 403, "AccessDenied" },
{ ERR_USER_SUSPENDED, 403, "UserSuspended" },
{ ERR_REQUEST_TIME_SKEWED, 403, "RequestTimeTooSkewed" },
+ { ERR_QUOTA_EXCEEDED, 403, "QuotaExceeded" },
{ ENOENT, 404, "NoSuchKey" },
{ ERR_NO_SUCH_BUCKET, 404, "NoSuchBucket" },
{ ERR_NO_SUCH_UPLOAD, 404, "NoSuchUpload" },
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index 189e9ae961e..4d6b25374b9 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -396,6 +396,7 @@ void RGWUserInfo::dump(Formatter *f) const
}
encode_json("default_placement", default_placement, f);
encode_json("placement_tags", placement_tags, f);
+ encode_json("bucket_quota", bucket_quota, f);
}
@@ -446,6 +447,21 @@ void RGWUserInfo::decode_json(JSONObj *obj)
system = (__u8)sys;
JSONDecoder::decode_json("default_placement", default_placement, obj);
JSONDecoder::decode_json("placement_tags", placement_tags, obj);
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
+}
+
+void RGWQuotaInfo::dump(Formatter *f) const
+{
+ f->dump_bool("enabled", enabled);
+ f->dump_int("max_size_kb", max_size_kb);
+ f->dump_int("max_objects", max_objects);
+}
+
+void RGWQuotaInfo::decode_json(JSONObj *obj)
+{
+ JSONDecoder::decode_json("max_size_kb", max_size_kb, obj);
+ JSONDecoder::decode_json("max_objects", max_objects, obj);
+ JSONDecoder::decode_json("enabled", enabled, obj);
}
void rgw_bucket::dump(Formatter *f) const
@@ -497,6 +513,7 @@ void RGWBucketInfo::dump(Formatter *f) const
encode_json("region", region, f);
encode_json("placement_rule", placement_rule, f);
encode_json("has_instance_obj", has_instance_obj, f);
+ encode_json("quota", quota, f);
}
void RGWBucketInfo::decode_json(JSONObj *obj) {
@@ -507,6 +524,7 @@ void RGWBucketInfo::decode_json(JSONObj *obj) {
JSONDecoder::decode_json("region", region, obj);
JSONDecoder::decode_json("placement_rule", placement_rule, obj);
JSONDecoder::decode_json("has_instance_obj", has_instance_obj, obj);
+ JSONDecoder::decode_json("quota", quota, obj);
}
void RGWObjEnt::dump(Formatter *f) const
@@ -673,12 +691,14 @@ void RGWRegionMap::dump(Formatter *f) const
{
encode_json("regions", regions, f);
encode_json("master_region", master_region, f);
+ encode_json("bucket_quota", bucket_quota, f);
}
void RGWRegionMap::decode_json(JSONObj *obj)
{
JSONDecoder::decode_json("regions", regions, obj);
JSONDecoder::decode_json("master_region", master_region, obj);
+ JSONDecoder::decode_json("bucket_quota", bucket_quota, obj);
}
void RGWMetadataLogInfo::dump(Formatter *f) const
diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc
index 54db609521c..acaa5deffee 100644
--- a/src/rgw/rgw_main.cc
+++ b/src/rgw/rgw_main.cc
@@ -357,6 +357,13 @@ void RGWProcess::handle_request(RGWRequest *req)
goto done;
}
+ req->log(s, "init op");
+ ret = op->init_processing();
+ if (ret < 0) {
+ abort_early(s, op, ret);
+ goto done;
+ }
+
req->log(s, "verifying op mask");
ret = op->verify_op_mask();
if (ret < 0) {
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc
index ca5ad3f2e7a..23f73e26531 100644
--- a/src/rgw/rgw_metadata.cc
+++ b/src/rgw/rgw_metadata.cc
@@ -1,7 +1,7 @@
-#include "rgw_metadata.h"
#include "common/ceph_json.h"
+#include "rgw_metadata.h"
#include "cls/version/cls_version_types.h"
#include "rgw_rados.h"
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 114b8709a22..2e07e3fcde6 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -421,6 +421,47 @@ int RGWOp::verify_op_mask()
return 0;
}
+int RGWOp::init_quota()
+{
+ /* no quota enforcement for system requests */
+ if (s->system_request)
+ return 0;
+
+ /* init quota related stuff */
+ if (!(s->user.op_mask & RGW_OP_TYPE_MODIFY)) {
+ return 0;
+ }
+
+ /* only interested in object related ops */
+ if (s->object_str.empty()) {
+ return 0;
+ }
+
+ if (s->bucket_info.quota.enabled) {
+ bucket_quota = s->bucket_info.quota;
+ return 0;
+ }
+ if (s->user.user_id == s->bucket_owner.get_id()) {
+ if (s->user.bucket_quota.enabled) {
+ bucket_quota = s->user.bucket_quota;
+ return 0;
+ }
+ } else {
+ RGWUserInfo owner_info;
+ int r = rgw_get_user_info_by_uid(store, s->bucket_info.owner, owner_info);
+ if (r < 0)
+ return r;
+
+ if (owner_info.bucket_quota.enabled) {
+ bucket_quota = owner_info.bucket_quota;
+ return 0;
+ }
+ }
+
+ bucket_quota = store->region_map.bucket_quota;
+ return 0;
+}
+
static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) {
uint8_t flags = 0;
if (strcmp(req_meth, "GET") == 0) flags = RGW_CORS_GET;
@@ -1363,6 +1404,14 @@ void RGWPutObj::execute()
ldout(s->cct, 15) << "supplied_md5=" << supplied_md5 << dendl;
}
+ if (!chunked_upload) { /* with chunked upload we don't know how big is the upload.
+ we also check sizes at the end anyway */
+ ret = store->check_quota(s->bucket, bucket_quota, s->content_length);
+ if (ret < 0) {
+ goto done;
+ }
+ }
+
if (supplied_etag) {
strncpy(supplied_md5, supplied_etag, sizeof(supplied_md5) - 1);
supplied_md5[sizeof(supplied_md5) - 1] = '\0';
@@ -1407,6 +1456,11 @@ void RGWPutObj::execute()
s->obj_size = ofs;
perfcounter->inc(l_rgw_put_b, s->obj_size);
+ ret = store->check_quota(s->bucket, bucket_quota, s->obj_size);
+ if (ret < 0) {
+ goto done;
+ }
+
hash.Final(m);
buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
@@ -1604,6 +1658,13 @@ void RGWPutMetadata::execute()
}
}
+ map<string, string>::iterator giter;
+ for (giter = s->generic_attrs.begin(); giter != s->generic_attrs.end(); ++giter) {
+ bufferlist& attrbl = attrs[giter->first];
+ const string& val = giter->second;
+ attrbl.append(val.c_str(), val.size() + 1);
+ }
+
if (has_policy) {
policy.encode(bl);
attrs[RGW_ATTR_ACL] = bl;
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 948a11830c2..eee5ea99065 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -20,6 +20,7 @@
#include "rgw_bucket.h"
#include "rgw_acl.h"
#include "rgw_cors.h"
+#include "rgw_quota.h"
using namespace std;
@@ -36,10 +37,21 @@ protected:
RGWRados *store;
RGWCORSConfiguration bucket_cors;
bool cors_exist;
+ RGWQuotaInfo bucket_quota;
+
+ virtual int init_quota();
public:
RGWOp() : s(NULL), dialect_handler(NULL), store(NULL), cors_exist(false) {}
virtual ~RGWOp() {}
+ virtual int init_processing() {
+ int ret = init_quota();
+ if (ret < 0)
+ return ret;
+
+ return 0;
+ }
+
virtual void init(RGWRados *store, struct req_state *s, RGWHandler *dialect_handler) {
this->store = store;
this->s = s;
diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc
new file mode 100644
index 00000000000..66609ca723c
--- /dev/null
+++ b/src/rgw/rgw_quota.cc
@@ -0,0 +1,332 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include "include/utime.h"
+#include "common/lru_map.h"
+#include "common/RefCountedObj.h"
+
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_quota.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+
+struct RGWQuotaBucketStats {
+ RGWBucketStats stats;
+ utime_t expiration;
+ utime_t async_refresh_time;
+};
+
+class RGWBucketStatsCache {
+ RGWRados *store;
+ lru_map<rgw_bucket, RGWQuotaBucketStats> stats_map;
+ RefCountedWaitObject *async_refcount;
+
+ int fetch_bucket_totals(rgw_bucket& bucket, RGWBucketStats& stats);
+
+public:
+ RGWBucketStatsCache(RGWRados *_store) : store(_store), stats_map(store->ctx()->_conf->rgw_bucket_quota_cache_size) {
+ async_refcount = new RefCountedWaitObject;
+ }
+ ~RGWBucketStatsCache() {
+ async_refcount->put_wait(); /* wait for all pending async requests to complete */
+ }
+
+ int get_bucket_stats(rgw_bucket& bucket, RGWBucketStats& stats, RGWQuotaInfo& quota);
+ void adjust_bucket_stats(rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes);
+
+ bool can_use_cached_stats(RGWQuotaInfo& quota, RGWBucketStats& stats);
+
+ void set_stats(rgw_bucket& bucket, RGWQuotaBucketStats& qs, RGWBucketStats& stats);
+ int async_refresh(rgw_bucket& bucket, RGWQuotaBucketStats& qs);
+ void async_refresh_response(rgw_bucket& bucket, RGWBucketStats& stats);
+};
+
+bool RGWBucketStatsCache::can_use_cached_stats(RGWQuotaInfo& quota, RGWBucketStats& cached_stats)
+{
+ if (quota.max_size_kb >= 0) {
+ if (quota.max_size_soft_threshold < 0) {
+ quota.max_size_soft_threshold = quota.max_size_kb * store->ctx()->_conf->rgw_bucket_quota_soft_threshold;
+ }
+
+ if (cached_stats.num_kb_rounded >= (uint64_t)quota.max_size_soft_threshold) {
+ ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (size): "
+ << cached_stats.num_kb_rounded << " >= " << quota.max_size_soft_threshold << dendl;
+ return false;
+ }
+ }
+
+ if (quota.max_objects >= 0) {
+ if (quota.max_objs_soft_threshold < 0) {
+ quota.max_objs_soft_threshold = quota.max_objects * store->ctx()->_conf->rgw_bucket_quota_soft_threshold;
+ }
+
+ if (cached_stats.num_objects >= (uint64_t)quota.max_objs_soft_threshold) {
+ ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (num objs): "
+ << cached_stats.num_objects << " >= " << quota.max_objs_soft_threshold << dendl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+int RGWBucketStatsCache::fetch_bucket_totals(rgw_bucket& bucket, RGWBucketStats& stats)
+{
+ RGWBucketInfo bucket_info;
+
+ uint64_t bucket_ver;
+ uint64_t master_ver;
+
+ map<RGWObjCategory, RGWBucketStats> bucket_stats;
+ int r = store->get_bucket_stats(bucket, &bucket_ver, &master_ver, bucket_stats, NULL);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl;
+ return r;
+ }
+
+ stats = RGWBucketStats();
+
+ map<RGWObjCategory, RGWBucketStats>::iterator iter;
+ for (iter = bucket_stats.begin(); iter != bucket_stats.end(); ++iter) {
+ RGWBucketStats& s = iter->second;
+ stats.num_kb += s.num_kb;
+ stats.num_kb_rounded += s.num_kb_rounded;
+ stats.num_objects += s.num_objects;
+ }
+
+ return 0;
+}
+
+class AsyncRefreshHandler : public RGWGetBucketStats_CB {
+ RGWRados *store;
+ RGWBucketStatsCache *cache;
+public:
+ AsyncRefreshHandler(RGWRados *_store, RGWBucketStatsCache *_cache, rgw_bucket& _bucket) : RGWGetBucketStats_CB(_bucket), store(_store), cache(_cache) {}
+
+ int init_fetch();
+
+ void handle_response(int r);
+};
+
+
+int AsyncRefreshHandler::init_fetch()
+{
+ ldout(store->ctx(), 20) << "initiating async quota refresh for bucket=" << bucket << dendl;
+ map<RGWObjCategory, RGWBucketStats> bucket_stats;
+ int r = store->get_bucket_stats_async(bucket, this);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl;
+
+ /* get_bucket_stats_async() dropped our reference already */
+ return r;
+ }
+
+ return 0;
+}
+
+void AsyncRefreshHandler::handle_response(int r)
+{
+ if (r < 0) {
+ ldout(store->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl;
+ return; /* nothing to do here */
+ }
+
+ RGWBucketStats bs;
+
+ map<RGWObjCategory, RGWBucketStats>::iterator iter;
+ for (iter = stats->begin(); iter != stats->end(); ++iter) {
+ RGWBucketStats& s = iter->second;
+ bs.num_kb += s.num_kb;
+ bs.num_kb_rounded += s.num_kb_rounded;
+ bs.num_objects += s.num_objects;
+ }
+
+ cache->async_refresh_response(bucket, bs);
+}
+
+class RGWBucketStatsAsyncTestSet : public lru_map<rgw_bucket, RGWQuotaBucketStats>::UpdateContext {
+ int objs_delta;
+ uint64_t added_bytes;
+ uint64_t removed_bytes;
+public:
+ RGWBucketStatsAsyncTestSet() {}
+ bool update(RGWQuotaBucketStats *entry) {
+ if (entry->async_refresh_time.sec() == 0)
+ return false;
+
+ entry->async_refresh_time = utime_t(0, 0);
+
+ return true;
+ }
+};
+
+int RGWBucketStatsCache::async_refresh(rgw_bucket& bucket, RGWQuotaBucketStats& qs)
+{
+ /* protect against multiple updates */
+ RGWBucketStatsAsyncTestSet test_update;
+ if (!stats_map.find_and_update(bucket, NULL, &test_update)) {
+ /* most likely we just raced with another update */
+ return 0;
+ }
+
+ async_refcount->get();
+
+ AsyncRefreshHandler *handler = new AsyncRefreshHandler(store, this, bucket);
+
+ int ret = handler->init_fetch();
+ if (ret < 0) {
+ async_refcount->put();
+ handler->put();
+ return ret;
+ }
+
+ return 0;
+}
+
+void RGWBucketStatsCache::async_refresh_response(rgw_bucket& bucket, RGWBucketStats& stats)
+{
+ ldout(store->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl;
+
+ RGWQuotaBucketStats qs;
+
+ stats_map.find(bucket, qs);
+
+ set_stats(bucket, qs, stats);
+
+ async_refcount->put();
+}
+
+void RGWBucketStatsCache::set_stats(rgw_bucket& bucket, RGWQuotaBucketStats& qs, RGWBucketStats& stats)
+{
+ qs.stats = stats;
+ qs.expiration = ceph_clock_now(store->ctx());
+ qs.async_refresh_time = qs.expiration;
+ qs.expiration += store->ctx()->_conf->rgw_bucket_quota_ttl;
+ qs.async_refresh_time += store->ctx()->_conf->rgw_bucket_quota_ttl / 2;
+
+ stats_map.add(bucket, qs);
+}
+
+int RGWBucketStatsCache::get_bucket_stats(rgw_bucket& bucket, RGWBucketStats& stats, RGWQuotaInfo& quota) {
+ RGWQuotaBucketStats qs;
+ utime_t now = ceph_clock_now(store->ctx());
+ if (stats_map.find(bucket, qs)) {
+ if (qs.async_refresh_time.sec() > 0 && now >= qs.async_refresh_time) {
+ int r = async_refresh(bucket, qs);
+ if (r < 0) {
+ ldout(store->ctx(), 0) << "ERROR: quota async refresh returned ret=" << r << dendl;
+
+ /* continue processing, might be a transient error, async refresh is just optimization */
+ }
+ }
+
+ if (can_use_cached_stats(quota, qs.stats) && qs.expiration > ceph_clock_now(store->ctx())) {
+ stats = qs.stats;
+ return 0;
+ }
+ }
+
+ int ret = fetch_bucket_totals(bucket, stats);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+
+ set_stats(bucket, qs, stats);
+
+ return 0;
+}
+
+
+class RGWBucketStatsUpdate : public lru_map<rgw_bucket, RGWQuotaBucketStats>::UpdateContext {
+ int objs_delta;
+ uint64_t added_bytes;
+ uint64_t removed_bytes;
+public:
+ RGWBucketStatsUpdate(int _objs_delta, uint64_t _added_bytes, uint64_t _removed_bytes) :
+ objs_delta(_objs_delta), added_bytes(_added_bytes), removed_bytes(_removed_bytes) {}
+ bool update(RGWQuotaBucketStats *entry) {
+ uint64_t rounded_kb_added = rgw_rounded_kb(added_bytes);
+ uint64_t rounded_kb_removed = rgw_rounded_kb(removed_bytes);
+
+ entry->stats.num_kb_rounded += (rounded_kb_added - rounded_kb_removed);
+ entry->stats.num_kb += (added_bytes - removed_bytes) / 1024;
+ entry->stats.num_objects += objs_delta;
+
+ return true;
+ }
+};
+
+
+void RGWBucketStatsCache::adjust_bucket_stats(rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes)
+{
+ RGWBucketStatsUpdate update(objs_delta, added_bytes, removed_bytes);
+ stats_map.find_and_update(bucket, NULL, &update);
+}
+
+
+class RGWQuotaHandlerImpl : public RGWQuotaHandler {
+ RGWRados *store;
+ RGWBucketStatsCache stats_cache;
+public:
+ RGWQuotaHandlerImpl(RGWRados *_store) : store(_store), stats_cache(_store) {}
+ virtual int check_quota(rgw_bucket& bucket, RGWQuotaInfo& bucket_quota,
+ uint64_t num_objs, uint64_t size) {
+ uint64_t size_kb = rgw_rounded_kb(size);
+ if (!bucket_quota.enabled) {
+ return 0;
+ }
+
+ RGWBucketStats stats;
+
+ int ret = stats_cache.get_bucket_stats(bucket, stats, bucket_quota);
+ if (ret < 0)
+ return ret;
+
+ ldout(store->ctx(), 20) << "bucket quota: max_objects=" << bucket_quota.max_objects
+ << " max_size_kb=" << bucket_quota.max_size_kb << dendl;
+
+ if (bucket_quota.max_objects >= 0 &&
+ stats.num_objects + num_objs > (uint64_t)bucket_quota.max_objects) {
+ ldout(store->ctx(), 10) << "quota exceeded: stats.num_objects=" << stats.num_objects
+ << " bucket_quota.max_objects=" << bucket_quota.max_objects << dendl;
+
+ return -ERR_QUOTA_EXCEEDED;
+ }
+ if (bucket_quota.max_size_kb >= 0 &&
+ stats.num_kb_rounded + size_kb > (uint64_t)bucket_quota.max_size_kb) {
+ ldout(store->ctx(), 10) << "quota exceeded: stats.num_kb_rounded=" << stats.num_kb_rounded << " size_kb=" << size_kb
+ << " bucket_quota.max_size_kb=" << bucket_quota.max_size_kb << dendl;
+ return -ERR_QUOTA_EXCEEDED;
+ }
+
+ return 0;
+ }
+
+ virtual void update_stats(rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) {
+ stats_cache.adjust_bucket_stats(bucket, obj_delta, added_bytes, removed_bytes);
+ };
+};
+
+
+RGWQuotaHandler *RGWQuotaHandler::generate_handler(RGWRados *store)
+{
+ return new RGWQuotaHandlerImpl(store);
+};
+
+void RGWQuotaHandler::free_handler(RGWQuotaHandler *handler)
+{
+ delete handler;
+}
diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h
new file mode 100644
index 00000000000..2f8f28e85a2
--- /dev/null
+++ b/src/rgw/rgw_quota.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank, Inc
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RGW_QUOTA_H
+#define CEPH_RGW_QUOTA_H
+
+
+#include "include/utime.h"
+#include "include/atomic.h"
+#include "common/lru_map.h"
+
+class RGWRados;
+class JSONObj;
+
+struct RGWQuotaInfo {
+ int64_t max_size_kb;
+ int64_t max_objects;
+ bool enabled;
+ int64_t max_size_soft_threshold;
+ int64_t max_objs_soft_threshold;
+
+ RGWQuotaInfo() : max_size_kb(-1), max_objects(-1), enabled(false),
+ max_size_soft_threshold(-1), max_objs_soft_threshold(-1) {}
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ ::encode(max_size_kb, bl);
+ ::encode(max_objects, bl);
+ ::encode(enabled, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::iterator& bl) {
+ DECODE_START(1, bl);
+ ::decode(max_size_kb, bl);
+ ::decode(max_objects, bl);
+ ::decode(enabled, bl);
+ DECODE_FINISH(bl);
+ }
+
+ void dump(Formatter *f) const;
+
+ void decode_json(JSONObj *obj);
+
+};
+WRITE_CLASS_ENCODER(RGWQuotaInfo)
+
+class rgw_bucket;
+
+class RGWQuotaHandler {
+public:
+ RGWQuotaHandler() {}
+ virtual ~RGWQuotaHandler() {
+ }
+ virtual int check_quota(rgw_bucket& bucket, RGWQuotaInfo& bucket_quota,
+ uint64_t num_objs, uint64_t size) = 0;
+
+ virtual void update_stats(rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) = 0;
+
+ static RGWQuotaHandler *generate_handler(RGWRados *store);
+ static void free_handler(RGWQuotaHandler *handler);
+};
+
+#endif
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 8b4d18f4e68..9f0a900f3d3 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -357,16 +357,20 @@ int RGWZoneParams::store_info(CephContext *cct, RGWRados *store, RGWRegion& regi
}
void RGWRegionMap::encode(bufferlist& bl) const {
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 1, bl);
::encode(regions, bl);
::encode(master_region, bl);
+ ::encode(bucket_quota, bl);
ENCODE_FINISH(bl);
}
void RGWRegionMap::decode(bufferlist::iterator& bl) {
- DECODE_START(1, bl);
+ DECODE_START(2, bl);
::decode(regions, bl);
::decode(master_region, bl);
+
+ if (struct_v >= 2)
+ ::decode(bucket_quota, bl);
DECODE_FINISH(bl);
regions_by_api.clear();
@@ -851,6 +855,7 @@ void RGWRados::finalize()
RGWRESTConn *conn = iter->second;
delete conn;
}
+ RGWQuotaHandler::free_handler(quota_handler);
}
/**
@@ -962,6 +967,8 @@ int RGWRados::init_complete()
if (use_gc_thread)
gc->start_processor();
+ quota_handler = RGWQuotaHandler::generate_handler(this);
+
return ret;
}
@@ -2342,6 +2349,11 @@ int RGWRados::put_obj_meta_impl(void *ctx, rgw_obj& obj, uint64_t size,
*mtime = set_mtime;
}
+ if (state) {
+ /* update quota cache */
+ quota_handler->update_stats(bucket, (state->exists ? 0 : 1), size, state->size);
+ }
+
return 0;
done_cancel:
@@ -3211,6 +3223,11 @@ int RGWRados::delete_obj_impl(void *ctx, rgw_obj& obj, RGWObjVersionTracker *obj
if (ret_not_existed)
return -ENOENT;
+ if (state) {
+ /* update quota cache */
+ quota_handler->update_stats(bucket, -1, 0, state->size);
+ }
+
return 0;
}
@@ -4598,6 +4615,38 @@ int RGWRados::get_bucket_stats(rgw_bucket& bucket, uint64_t *bucket_ver, uint64_
return 0;
}
+class RGWGetBucketStatsContext : public RGWGetDirHeader_CB {
+ RGWGetBucketStats_CB *cb;
+
+public:
+ RGWGetBucketStatsContext(RGWGetBucketStats_CB *_cb) : cb(_cb) {}
+ void handle_response(int r, rgw_bucket_dir_header& header) {
+ map<RGWObjCategory, RGWBucketStats> stats;
+
+ if (r >= 0) {
+ translate_raw_stats(header, stats);
+ cb->set_response(header.ver, header.master_ver, &stats, header.max_marker);
+ }
+
+ cb->handle_response(r);
+
+ cb->put();
+ }
+};
+
+int RGWRados::get_bucket_stats_async(rgw_bucket& bucket, RGWGetBucketStats_CB *ctx)
+{
+ RGWGetBucketStatsContext *get_ctx = new RGWGetBucketStatsContext(ctx);
+ int r = cls_bucket_head_async(bucket, get_ctx);
+ if (r < 0) {
+ ctx->put();
+ delete get_ctx;
+ return r;
+ }
+
+ return 0;
+}
+
void RGWRados::get_bucket_instance_entry(rgw_bucket& bucket, string& entry)
{
entry = bucket.name + ":" + bucket.bucket_id;
@@ -5480,6 +5529,25 @@ int RGWRados::cls_bucket_head(rgw_bucket& bucket, struct rgw_bucket_dir_header&
return 0;
}
+int RGWRados::cls_bucket_head_async(rgw_bucket& bucket, RGWGetDirHeader_CB *ctx)
+{
+ librados::IoCtx index_ctx;
+ string oid;
+ int r = open_bucket_index(bucket, index_ctx, oid);
+ if (r < 0)
+ return r;
+
+ r = cls_rgw_get_dir_header_async(index_ctx, oid, ctx);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWRados::check_quota(rgw_bucket& bucket, RGWQuotaInfo& quota_info, uint64_t obj_size)
+{
+ return quota_handler->check_quota(bucket, quota_info, 1, obj_size);
+}
class IntentLogNameFilter : public RGWAccessListFilter
{
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index 65765c414aa..52b898123d4 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -636,6 +636,8 @@ struct RGWRegionMap {
string master_region;
+ RGWQuotaInfo bucket_quota;
+
RGWRegionMap() : lock("RGWRegionMap") {}
void encode(bufferlist& bl) const;
@@ -759,6 +761,29 @@ public:
int renew_state();
};
+class RGWGetBucketStats_CB : public RefCountedObject {
+protected:
+ rgw_bucket bucket;
+ uint64_t bucket_ver;
+ uint64_t master_ver;
+ map<RGWObjCategory, RGWBucketStats> *stats;
+ string max_marker;
+public:
+ RGWGetBucketStats_CB(rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
+ virtual ~RGWGetBucketStats_CB() {}
+ virtual void handle_response(int r) = 0;
+ virtual void set_response(uint64_t _bucket_ver, uint64_t _master_ver,
+ map<RGWObjCategory, RGWBucketStats> *_stats,
+ const string &_max_marker) {
+ bucket_ver = _bucket_ver;
+ master_ver = _master_ver;
+ stats = _stats;
+ max_marker = _max_marker;
+ }
+};
+
+class RGWGetDirHeader_CB;
+
class RGWRados
{
@@ -862,6 +887,8 @@ protected:
string region_name;
string zone_name;
+ RGWQuotaHandler *quota_handler;
+
public:
RGWRados() : lock("rados_timer_lock"), timer(NULL),
gc(NULL), use_gc_thread(false),
@@ -870,6 +897,7 @@ public:
bucket_id_lock("rados_bucket_id"), max_bucket_id(0),
cct(NULL), rados(NULL),
pools_initialized(false),
+ quota_handler(NULL),
rest_master_conn(NULL),
meta_mgr(NULL), data_log(NULL) {}
@@ -1290,6 +1318,7 @@ public:
int decode_policy(bufferlist& bl, ACLOwner *owner);
int get_bucket_stats(rgw_bucket& bucket, uint64_t *bucket_ver, uint64_t *master_ver, map<RGWObjCategory, RGWBucketStats>& stats,
string *max_marker);
+ int get_bucket_stats_async(rgw_bucket& bucket, RGWGetBucketStats_CB *cb);
void get_bucket_instance_obj(rgw_bucket& bucket, rgw_obj& obj);
void get_bucket_instance_entry(rgw_bucket& bucket, string& entry);
void get_bucket_meta_oid(rgw_bucket& bucket, string& oid);
@@ -1321,6 +1350,7 @@ public:
map<string, RGWObjEnt>& m, bool *is_truncated,
string *last_entry, bool (*force_check_filter)(const string& name) = NULL);
int cls_bucket_head(rgw_bucket& bucket, struct rgw_bucket_dir_header& header);
+ int cls_bucket_head_async(rgw_bucket& bucket, RGWGetDirHeader_CB *ctx);
int prepare_update_index(RGWObjState *state, rgw_bucket& bucket,
RGWModifyOp op, rgw_obj& oid, string& tag);
int complete_update_index(rgw_bucket& bucket, string& oid, string& tag, int64_t poolid, uint64_t epoch, uint64_t size,
@@ -1376,6 +1406,8 @@ public:
int bucket_rebuild_index(rgw_bucket& bucket);
int remove_objs_from_index(rgw_bucket& bucket, list<string>& oid_list);
+ int check_quota(rgw_bucket& bucket, RGWQuotaInfo& quota_info, uint64_t obj_size);
+
string unique_id(uint64_t unique_num) {
char buf[32];
snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 5e5b5c564bb..dc529e3d48d 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -1682,6 +1682,9 @@ int RGWUser::execute_add(RGWUserAdminOpState& op_state, std::string *err_msg)
if (op_state.op_mask_specified)
user_info.op_mask = op_state.get_op_mask();
+ if (op_state.has_bucket_quota())
+ user_info.bucket_quota = op_state.get_bucket_quota();
+
// update the request
op_state.set_user_info(user_info);
op_state.set_populated();
@@ -1884,6 +1887,9 @@ int RGWUser::execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg)
if (op_state.op_mask_specified)
user_info.op_mask = op_state.get_op_mask();
+ if (op_state.has_bucket_quota())
+ user_info.bucket_quota = op_state.get_bucket_quota();
+
if (op_state.has_suspension_op()) {
__u8 suspended = op_state.get_suspension_status();
user_info.suspended = suspended;
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
index 32bcf199001..e71b8f81778 100644
--- a/src/rgw/rgw_user.h
+++ b/src/rgw/rgw_user.h
@@ -172,6 +172,10 @@ struct RGWUserAdminOpState {
bool subuser_params_checked;
bool user_params_checked;
+ bool bucket_quota_specified;
+
+ RGWQuotaInfo bucket_quota;
+
void set_access_key(std::string& access_key) {
if (access_key.empty())
return;
@@ -285,6 +289,12 @@ struct RGWUserAdminOpState {
key_op = true;
}
+ void set_bucket_quota(RGWQuotaInfo& quota)
+ {
+ bucket_quota = quota;
+ bucket_quota_specified = true;
+ }
+
bool is_populated() { return populated; };
bool is_initialized() { return initialized; };
bool has_existing_user() { return existing_user; };
@@ -303,6 +313,7 @@ struct RGWUserAdminOpState {
bool will_purge_keys() { return purge_keys; };
bool will_purge_data() { return purge_data; };
bool will_generate_subuser() { return gen_subuser; };
+ bool has_bucket_quota() { return bucket_quota_specified; }
void set_populated() { populated = true; };
void clear_populated() { populated = false; };
void set_initialized() { initialized = true; };
@@ -317,6 +328,7 @@ struct RGWUserAdminOpState {
uint32_t get_subuser_perm() { return perm_mask; };
uint32_t get_max_buckets() { return max_buckets; };
uint32_t get_op_mask() { return op_mask; };
+ RGWQuotaInfo& get_bucket_quota() { return bucket_quota; }
std::string get_user_id() { return user_id; };
std::string get_subuser() { return subuser; };
@@ -403,6 +415,7 @@ struct RGWUserAdminOpState {
key_params_checked = false;
subuser_params_checked = false;
user_params_checked = false;
+ bucket_quota_specified = false;
}
};
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
index 2def60107dc..4fe30b1cda7 100644
--- a/src/test/cli/radosgw-admin/help.t
+++ b/src/test/cli/radosgw-admin/help.t
@@ -23,6 +23,9 @@
bucket check check bucket index
object rm remove object
object unlink unlink object from bucket index
+ quota set set quota params
+ quota enable enable quota
+ quota disable disable quota
region get show region info
regions list list all regions set on this cluster
region set set region info (requires infile)
@@ -116,6 +119,12 @@
<date> := "YYYY-MM-DD[ hh:mm:ss]"
+ Quota options:
+ --bucket specified bucket for quota command
+ --max-objects specify max objects
+ --max-size specify max size (in bytes)
+ --quota-scope scope of quota (bucket, user)
+
--conf/-c FILE read configuration from the given configuration file
--id/-i ID set ID portion of my name
--name/-n TYPE.ID set name
diff --git a/src/test/encoding/ceph_dencoder.cc b/src/test/encoding/ceph_dencoder.cc
index 81abcd1de9e..dbed6f524d8 100644
--- a/src/test/encoding/ceph_dencoder.cc
+++ b/src/test/encoding/ceph_dencoder.cc
@@ -93,7 +93,7 @@ public:
// allow 0- or 1-based (by wrapping)
if (i == 0)
i = m_list.size();
- if (i > m_list.size())
+ if ((i == 0) || (i > m_list.size()))
return "invalid id for generated object";
typename list<T*>::iterator p = m_list.begin();
for (i--; i > 0 && p != m_list.end(); ++p, --i) ;
@@ -177,7 +177,7 @@ public:
// allow 0- or 1-based (by wrapping)
if (i == 0)
i = m_list.size();
- if (i > m_list.size())
+ if ((i == 0) || (i > m_list.size()))
return "invalid id for generated object";
typename list<T*>::iterator p = m_list.begin();
for (i--; i > 0 && p != m_list.end(); ++p, --i) ;
diff --git a/src/test/filestore/run_seed_to_range.sh b/src/test/filestore/run_seed_to_range.sh
index c5b399d7aae..365b34918d2 100755
--- a/src/test/filestore/run_seed_to_range.sh
+++ b/src/test/filestore/run_seed_to_range.sh
@@ -12,7 +12,7 @@ mydir=`dirname $0`
for f in `seq $from $to`
do
if ! $mydir/run_seed_to.sh $seed $f; then
- if -d $dir; then
+ if [ -d $dir ]; then
echo copying evidence to $dir
cp -a . $dir
else
diff --git a/src/test/librados/cmd.cc b/src/test/librados/cmd.cc
index 71343f2b908..f47cc9fc7d2 100644
--- a/src/test/librados/cmd.cc
+++ b/src/test/librados/cmd.cc
@@ -100,8 +100,9 @@ TEST(LibRadosCmd, PGCmd) {
string pgid = stringify(poolid) + ".0";
cmd[0] = (char *)"asdfasdf";
- ASSERT_EQ(-22, rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
-
+ // note: tolerate NXIO here in case the cluster is thrashing out underneath us.
+ int r = rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen);
+ ASSERT_TRUE(r == -22 || r == -ENXIO);
// make sure the pg exists on the osd before we query it
rados_ioctx_t io;
@@ -114,7 +115,9 @@ TEST(LibRadosCmd, PGCmd) {
string qstr = "{\"prefix\":\"pg\", \"cmd\":\"query\", \"pgid\":\"" + pgid + "\"}";
cmd[0] = (char *)qstr.c_str();
- ASSERT_EQ(0, rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+ // note: tolerate ENOENT/ENXIO here if hte osd is thrashing out underneath us
+ r = rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen);
+ ASSERT_TRUE(r == 0 || r == -ENOENT || r == -ENXIO);
ASSERT_LT(0u, buflen);
rados_buffer_free(buf);
diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py
index 34bcf698e5a..540f690472b 100755
--- a/src/test/pybind/test_ceph_argparse.py
+++ b/src/test/pybind/test_ceph_argparse.py
@@ -460,10 +460,12 @@ class TestMDS(TestArgparse):
'toomany']))
def test_add_data_pool(self):
- self.check_1_natural_arg('mds', 'add_data_pool')
+ self.assert_valid_command(['mds', 'add_data_pool', '1'])
+ self.assert_valid_command(['mds', 'add_data_pool', 'foo'])
def test_remove_data_pool(self):
- self.check_1_natural_arg('mds', 'remove_data_pool')
+ self.assert_valid_command(['mds', 'remove_data_pool', '1'])
+ self.assert_valid_command(['mds', 'remove_data_pool', 'foo'])
def test_newfs(self):
self.assert_valid_command(['mds', 'newfs', '1', '2',
@@ -831,7 +833,7 @@ class TestOSD(TestArgparse):
uuid,
'toomany']))
- def test_blackist(self):
+ def test_blacklist(self):
for action in ('add', 'rm'):
self.assert_valid_command(['osd', 'blacklist', action,
'1.2.3.4/567'])
@@ -941,22 +943,17 @@ class TestOSD(TestArgparse):
def test_pool_set(self):
for var in ('size', 'min_size', 'crash_replay_interval',
- 'pg_num', 'pgp_num', 'crush_ruleset'):
+ 'pg_num', 'pgp_num', 'crush_ruleset',
+ 'hashpspool'):
self.assert_valid_command(['osd', 'pool',
- 'set', 'poolname', var, '-1'])
+ 'set', 'poolname', var, 'value'])
assert_equal({}, validate_command(sigdict, ['osd', 'pool',
'set']))
assert_equal({}, validate_command(sigdict, ['osd', 'pool',
'set', 'poolname']))
assert_equal({}, validate_command(sigdict, ['osd', 'pool',
'set', 'poolname',
- 'size', 'invalid']))
- assert_equal({}, validate_command(sigdict, ['osd', 'pool',
- 'set', 'poolname',
- 'invalid', '-1']))
- assert_equal({}, validate_command(sigdict, ['osd', 'pool',
- 'set', 'poolname',
- 'size', '-1',
+ 'size', 'value',
'toomany']))
def test_pool_set_quota(self):
diff --git a/src/vstart.sh b/src/vstart.sh
index def480779de..4839cc1156d 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -237,6 +237,7 @@ fi
$SUDO rm -f core*
test -d out || mkdir out
+test -d dev || mkdir dev
$SUDO rm -rf out/*
test -d gmon && $SUDO rm -rf gmon/*
@@ -390,7 +391,7 @@ EOF
cmd="rm -rf $CEPH_DEV_DIR/mon.$f"
echo $cmd
$cmd
- cmd="mkdir $CEPH_DEV_DIR/mon.$f"
+ cmd="mkdir -p $CEPH_DEV_DIR/mon.$f"
echo $cmd
$cmd
cmd="$CEPH_BIN/ceph-mon --mkfs -c $conf -i $f --monmap=$monmap_fn"