diff options
78 files changed, 2118 insertions, 784 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes index a3ec73290f3..a30cf8c6e17 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -21,3 +21,12 @@ v0.71 * Most output that used K or KB (e.g., for kilobyte) now uses a lower-case k to match the official SI convention. Any scripts that parse output and check for an upper-case K will need to be modified. + +v0.72 +~~~~~ + +* ceph-fuse and radosgw now use the same default values for the admin + socket and log file paths that the other daemons (ceph-osd, + ceph-mon, etc.) do. If you run these daemons as non-root, you may + need to adjust your ceph.conf to disable these options or to adjust + the permissions on /var/run/ceph and /var/log/ceph. diff --git a/ceph.spec.in b/ceph.spec.in index a60d87ad814..bcb1214cc93 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -239,14 +239,8 @@ License: LGPL-2.0 Requires: java Requires: libcephfs_jni1 = %{version}-%{release} BuildRequires: java-devel -%if 0%{?suse_version} > 1220 Requires: junit4 BuildRequires: junit4 -%else -Requires: junit -BuildRequires: junit -%endif -BuildRequires: junit %description -n cephfs-java This package contains the Java libraries for the Ceph File System. @@ -404,7 +398,6 @@ fi %{_bindir}/ceph-osd %{_bindir}/ceph-rbdnamer %{_bindir}/ceph-dencoder -%{_bindir}/ceph-rest-api %{_bindir}/librados-config %{_bindir}/rados %{_bindir}/rbd @@ -422,6 +415,7 @@ fi /sbin/mount.ceph %dir %{_libdir}/rados-classes %{_libdir}/rados-classes/libcls_rbd.so* +%{_libdir}/rados-classes/libcls_hello.so* %{_libdir}/rados-classes/libcls_rgw.so* %{_libdir}/rados-classes/libcls_lock.so* %{_libdir}/rados-classes/libcls_kvs.so* diff --git a/doc/index.rst b/doc/index.rst index 8bf5340b2f6..4068be599e5 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -90,6 +90,7 @@ about Ceph, see our `Architecture`_ section. :maxdepth: 1 :hidden: + start/intro start/index install/index rados/index diff --git a/doc/install/index.rst b/doc/install/index.rst index 347b6ae9ac2..3be09c5d0df 100644 --- a/doc/install/index.rst +++ b/doc/install/index.rst @@ -1,50 +1,54 @@ -============== - Installation -============== - -The Ceph Object Store is the foundation of all Ceph clusters, and it consists -primarily of two types of daemons: Object Storage Daemons (OSDs) and monitors. -The Ceph Object Store is based upon the concept of -:abbr:`RADOS (Reliable Autonomic Distributed Object Store)`, which eliminates -single points of failure and delivers infinite scalability. For details on -the architecture of Ceph and RADOS, refer to `Ceph Architecture`_. All Ceph -deployments have OSDs and monitors, so you should prepare your Ceph cluster -by focusing first on the object storage cluster. +======================= + Installation (Manual) +======================= .. raw:: html - <table cellpadding="10"><colgroup><col width="33%"><col width="33%"><col width="33%"></colgroup><tbody valign="top"><tr><td><h3>Recommendations</h3> - -To begin using Ceph in production, you should review our hardware -recommendations and operating system recommendations. Many of the -frequently-asked questions in our mailing list involve hardware-related -questions and how to install Ceph on various distributions. + <table><colgroup><col width="50%"><col width="50%"></colgroup><tbody valign="top"><tr><td><h3>Advanced Package Tool (APT)</h3> + +If you are deploying a Ceph cluster on Debian or Ubuntu distributions, +use the instructions below to install packages manually. .. toctree:: :maxdepth: 2 - Hardware Recommendations <hardware-recommendations> - OS Recommendations <os-recommendations> - -.. raw:: html + Installing Debian/Ubuntu Packages <debian> + Installing on Calxeda Hardware <calxeda> + Installing QEMU <qemu-deb> + Installing libvirt <libvirt-deb> - </td><td><h3>Installation</h3> +.. raw:: html -If you are deploying a Ceph cluster (that is, not developing Ceph), -install Ceph using our stable release packages. For testing, you -may install development release and testing packages. + </td><td><h3>Redhat Package Manager (RPM) / Yellowdog Updater, Modified (YUM) </h3> + +If you are deploying a Ceph cluster on Red Hat(rhel6), CentOS (el6), Fedora +17-19 (f17-f19), OpenSUSE 12 (opensuse12), and SLES (sles11) distributions, use +the instructions below to install packages manually. .. toctree:: :maxdepth: 2 - Installing Debian/Ubuntu Packages <debian> Installing RPM Packages <rpm> - Installing on Calxeda <calxeda> + Installing YUM Priorities <yum-priorities> + Installing QEMU <qemu-rpm> + Installing libvirt <libvirt-rpm> + +.. raw:: html + + </td></tr><tr><td><h3>Upgrading Ceph</h3> + +If you are upgrading Ceph from a previous release, please read the the upgrade +documentation to ensure that you follow the proper upgrade sequence. + +.. toctree:: + :maxdepth: 2 + Upgrading Ceph <upgrading-ceph> + -.. raw:: html +.. raw:: html - </td><td><h3>Building Ceph from Source</h3> + </td><td><h3>Building Ceph</h3> You can build Ceph from source by downloading a release or cloning the ``ceph`` repository at github. If you intend to build Ceph from source, please see the @@ -63,9 +67,10 @@ will save you time. Build a Package <build-packages> Contributing Code <contributing> +See the `Development`_ section for additional development details. .. raw:: html </td></tr></tbody></table> - -.. _Ceph Architecture: ../architecture/ + +.. _Development: ../../dev
\ No newline at end of file diff --git a/doc/install/libvirt-deb.rst b/doc/install/libvirt-deb.rst new file mode 100644 index 00000000000..9365e46c747 --- /dev/null +++ b/doc/install/libvirt-deb.rst @@ -0,0 +1,43 @@ +==================== + Installing libvirt +==================== + + +Prerequisites +============= + +- `Install`_ and `configure`_ a Ceph Storage Cluster +- `Install and configure`_ QEMU/KVM + + +Installing ``libvirt`` on Ubuntu 12.04 Precise +============================================== + +``libvirt`` packages are incorporated into the Ubuntu 12.04 precise +distribution. To install ``libvirt`` on precise, execute the following:: + + sudo apt-get update && sudo apt-get install libvirt-bin + + +Installing ``libvirt`` on Earlier Versions of Ubuntu +==================================================== + +For Ubuntu distributions 11.10 oneiric and earlier, you must build ``libvirt`` +from source. Clone the ``libvirt`` repository, and use `AutoGen`_ to generate +the build. Then, execute ``make`` and ``make install`` to complete the +installation. For example:: + + git clone git://libvirt.org/libvirt.git + cd libvirt + ./autogen.sh + make + sudo make install + +See `libvirt Installation`_ for details. + + +.. _libvirt Installation: http://www.libvirt.org/compiling.html +.. _AutoGen: http://www.gnu.org/software/autogen/ +.. _Install: ../index +.. _configure: ../../rados/configuration +.. _Install and configure: ../../rbd/qemu-rbd diff --git a/doc/install/libvirt-rpm.rst b/doc/install/libvirt-rpm.rst new file mode 100644 index 00000000000..a94c6e8ae12 --- /dev/null +++ b/doc/install/libvirt-rpm.rst @@ -0,0 +1,19 @@ +==================== + Installing libvirt +==================== + +To use ``libvirt`` with a Ceph Storage Cluster, you must +have a running Ceph Storage Cluster. You must also install QEMU. +See `Installing QEMU`_ for details. + + +``libvirt`` packages are incorporated into the recent CentOS/RHEL distributions. +To install ``libvirt``, execute the following:: + + sudo yum install libvirt + +See `libvirt Installation`_ for details. + + +.. _libvirt Installation: http://www.libvirt.org/compiling.html +.. _Installing QEMU: ../qemu-rpm
\ No newline at end of file diff --git a/doc/install/qemu-deb.rst b/doc/install/qemu-deb.rst new file mode 100644 index 00000000000..29abeafa3bc --- /dev/null +++ b/doc/install/qemu-deb.rst @@ -0,0 +1,26 @@ +================= + Installing QEMU +================= + + + +Installing QEMU (12.04 Precise and later) +========================================= + +QEMU packages are incorporated into Ubuntu 12.04 Precise Pangolin and later +versions. To install QEMU, execute the following:: + + sudo apt-get install qemu + +Installing QEMU (11.10 Oneric and earlier) +========================================== + +For Ubuntu distributions 11.10 Oneiric and earlier, you must install +the 0.15 version of QEMU or later. To build QEMU from source, use the +following procedure:: + + cd {your-development-directory} + git clone git://git.qemu.org/qemu.git + cd qemu + ./configure --enable-rbd + make; make install diff --git a/doc/install/qemu-rpm.rst b/doc/install/qemu-rpm.rst new file mode 100644 index 00000000000..67da2c3714c --- /dev/null +++ b/doc/install/qemu-rpm.rst @@ -0,0 +1,56 @@ +================= + Installing QEMU +================= + +To install QEMU with ``yum``, you must ensure that you have +``yum-plugin-priorities`` installed. See `Installing YUM Priorities`_ +for details. + +To install QEMU, execute the following: + +#. Create a ``/etc/yum.repos.d/ceph-qemu.conf`` file with the following + contents:: + + [ceph-qemu] + name=Ceph Packages for QEMU + baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/$basearch + enabled=1 + priority=2 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc + + [ceph-qemu-noarch] + name=Ceph QEMU noarch + baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/noarch + enabled=1 + priority=2 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc + + [ceph-qemu-source] + name=Ceph QEMU Sources + baseurl=http://ceph.com/packages/ceph-extras/rpm/centos6.3/SRPMS + enabled=1 + priority=2 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc + +#. Update your repositories. :: + + sudo yum update + +#. Install QEMU for Ceph. :: + + sudo yum install qemu-kvm qemu-kvm-tools qemu-img + +#. Install additional QEMU packages (optional):: + + sudo yum install qemu-guest-agent qemu-guest-agent-win32 + +See `QEMU and Block Devices`_ for usage. + +.. _QEMU and Block Devices: ../../rbd/qemu-rbd +.. _Installing YUM Priorities: ../yum-priorities
\ No newline at end of file diff --git a/doc/install/rpm.rst b/doc/install/rpm.rst index ea96d394c7a..9e8cdcd003c 100644 --- a/doc/install/rpm.rst +++ b/doc/install/rpm.rst @@ -7,6 +7,7 @@ development release packages (for the latest features), or development testing packages (for development and QA only). Do not add multiple package sources at the same time. + Install Release Key =================== @@ -139,142 +140,54 @@ You can download the RPMs directly from:: -Installing Ceph Deploy -====================== - -Once you have added either release or development packages to ``yum``, you -can install ``ceph-deploy``. :: - - sudo yum install ceph-deploy python-pushy - - - -Installing Ceph Packages -======================== - -Once you have added either release or development packages to ``yum``, you -can install Ceph packages. You can also use ``ceph-deploy`` to install Ceph -packages. :: - - sudo yum install ceph - - - -Installing Ceph Object Storage -============================== - -:term:`Ceph Object Storage` runs on Apache and FastCGI in conjunction with the -:term:`Ceph Storage Cluster`. - -#. Install Apache and FastCGI. :: - - rpm -ivh fcgi-2.4.0-10.el6.x86_64.rpm - rpm -ivh mod_fastcgi-2.4.6-2.el6.rf.x86_64.rpm - - -#. Install the Ceph Object Storage daemon. :: +Adding Ceph to YUM +================== - yum install ceph-radosgw +You may also add Ceph to the ``/etc/yum.repos.d`` directory. Create a +``ceph.repo`` file. In the example below, replace ``{ceph-stable}`` with +a stable release of Ceph (e.g., ``cuttlefish``, ``dumpling``, etc.) and +``{distro}`` with your Linux distribution (e.g., ``el6``, ``rhel6``, etc.). :: + [ceph] + name=Ceph packages for $basearch + baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/$basearch + enabled=1 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc -#. Add the following lines to your Ceph configuration file. + [ceph-noarch] + name=Ceph noarch packages + baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/noarch + enabled=1 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc -.. code-block:: ini + [ceph-source] + name=Ceph source packages + baseurl=http://ceph.com/rpm-{ceph-stable}/{distro}/SRPMS + enabled=0 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc - [client.radosgw.gateway] - host = {fqdn} - keyring = /etc/ceph/keyring.radosgw.gateway - rgw socket path = /tmp/radosgw.sock - log file = /var/log/ceph/radosgw.log - rgw print continue = false - -.. note:: Replace ``{fqdn}`` with the output from ``hostname``. This is - important. Debian systems use the simple hostname, but on CentOS 6/RHEL 6 - you must use the fully qualified domain name. - -#. Create a data directory. :: - - mkdir -p /var/lib/ceph/radosgw/ceph-radosgw.gateway - - -#. Change ``httpd ServerName`` in ``/etc/httpd/conf/httpd.conf``. :: - - ServerName {FQDN} - - -#. Create an Apache httpd virtual host in ``/etc/httpd/conf.d/rgw.conf``. :: - - FastCgiExternalServer /var/www/s3gw.fcgi -socket /tmp/radosgw.sock - <VirtualHost *:80> - ServerName <FQDN of the host> - ServerAdmin root@localhost - DocumentRoot /var/www - RewriteEngine On - RewriteRule ^/([a-zA-Z0-9-_.]*)([/]?.*) /s3gw.fcgi?page=$1¶ms=$2&%{QUERY_STRING} [E=HTTP_AUTHORIZATION:%{HTTP:Authorization},L] - <IfModule mod_fastcgi.c> - <Directory /var/www> - Options +ExecCGI - AllowOverride All - SetHandler fastcgi-script - Order allow,deny - Allow from all - AuthBasicAuthoritative Off - </Directory> - </IfModule> - AllowEncodedSlashes On - ErrorLog /var/log/httpd/error.log - CustomLog /var/log/httpd/access.log combined - ServerSignature Off - </VirtualHost> - -#. Turn off ``fastcgiwrapper`` in ``/etc/httpd/conf.d/fastcgi.conf`` by - commenting out the following line:: - - #FastCgiWrapper On - - -#. Add a ``fastcgi`` script with the following path ``/var/www/s3gw.fcgi``. :: - - #!/bin/sh - exec /usr/bin/radosgw -c /etc/ceph/ceph.conf -n client.radosgw.gateway - - -#. Make ``s3gw.fcgi`` executable:: - - chmod +x /var/www/s3gw.fcgi - - -#. Create a user key. :: - - ceph-authtool -C -n client.radosgw.gateway --gen-key /etc/ceph/keyring.radosgw.gateway - ceph-authtool -n client.radosgw.gateway --cap mon 'allow rw' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway - ceph auth add client.radosgw.gateway --in-file=/etc/ceph/keyring.radosgw.gateway - - -#. Please make sure ``/etc/ceph/keyring.radosgw.gateway`` file and - ``/var/log/ceph/radosgw.log`` are accessible by the ``apache`` user. :: - - sudo chown apache:apache /etc/ceph/keyring.radosgw.gateway - sudo chown apache:apache /var/log/ceph/radosgw.log - -.. note:: This is important. The user is ``root`` for Debian. +Installing Ceph Deploy +====================== -#. Create ``.rgw.buckets`` and add it to the Ceph Object Storage daemon. :: +Once you have added either release or development packages, or added a +``ceph.repo`` file to ``/etc/yum.repos.d``, you can install ``ceph-deploy``. :: - rados mkpool .rgw.buckets - radosgw-admin pool add --pool .rgw.buckets + sudo yum install ceph-deploy python-pushy -#. Configure Apache and the Ceph Object Storage daemon to start on boot. :: - chkconfig httpd on - chkconfig ceph-radosgw on +Installing Ceph Packages +======================== -#. Start the services. :: +Once you have added either release or development packages, or added a +``ceph.repo`` file to ``/etc/yum.repos.d``, you can install Ceph packages. :: - /etc/init.d/httpd start - /etc/init.d/ceph-radosgw start - -See `Ceph Object Storage`_ for additional details. + sudo yum install ceph -.. _Ceph Object Storage: ../../radosgw +.. note:: You can also use ``ceph-deploy`` to install Ceph packages. diff --git a/doc/install/yum-priorities.rst b/doc/install/yum-priorities.rst new file mode 100644 index 00000000000..e4adb72b7dd --- /dev/null +++ b/doc/install/yum-priorities.rst @@ -0,0 +1,20 @@ +=========================== + Installing YUM Priorities +=========================== + +Ceph builds packages for Apache and FastCGI (for 100-continue support) and +QEMU (for ``rbd`` support). You must set priorities in your ``.repo`` +files to ensure that ``yum`` installs the Ceph packages instead of the +standard packages. The ``priorities`` setting requires you to install +and enable ``yum-plugin-priorities``. + +#. Install ``yum-plugin-priorities``. :: + + sudo yum install yum-plugin-priorities + +#. Ensure ``/etc/yum/pluginconf.d/priorities.conf`` exists. :: + +#. Ensure ``priorities.conf`` enables the plugin. :: + + [main] + enabled = 1 diff --git a/doc/rados/operations/add-or-rm-mons.rst b/doc/rados/operations/add-or-rm-mons.rst index 17ae9d86b85..e3bac1fca09 100644 --- a/doc/rados/operations/add-or-rm-mons.rst +++ b/doc/rados/operations/add-or-rm-mons.rst @@ -32,7 +32,7 @@ version of Linux installed (typically Ubuntu 12.04 precise). Add your monitor host to a rack in your cluster, connect it to the network and ensure that it has network connectivity. -.. _Hardware Recommendations: ../../install/hardware-recommendations +.. _Hardware Recommendations: ../../../start/hardware-recommendations Install the Required Software ----------------------------- @@ -42,17 +42,9 @@ manually. See `Installing Debian/Ubuntu Packages`_ for details. You should configure SSH to a user with password-less authentication and root permissions. -.. _Installing Debian/Ubuntu Packages: ../../install/debian +.. _Installing Debian/Ubuntu Packages: ../../../install/debian -For clusters deployed with Chef, create a `chef user`_, `configure -SSH keys`_, `install Ruby`_ and `install the Chef client`_ on your host. See -`Installing Chef`_ for details. -.. _chef user: ../../install/chef#createuser -.. _configure SSH keys: ../../install/chef#genkeys -.. _install the Chef client: ../../install/chef#installchef -.. _Installing Chef: ../../install/chef -.. _install Ruby: ../../install/chef#installruby .. _Adding a Monitor (Manual): diff --git a/doc/rados/operations/authentication.rst b/doc/rados/operations/authentication.rst index 6bacf4c7dff..d9995da8fb8 100644 --- a/doc/rados/operations/authentication.rst +++ b/doc/rados/operations/authentication.rst @@ -154,6 +154,7 @@ during setup and/or troubleshooting to temporarily disable authentication. auth cluster required = none auth service required = none auth client required = none + auth supported = none #. Or, disable ``cephx`` authentication for versions ``0.50`` and below (deprecated as of version 0.51) by setting the following option in the diff --git a/doc/rados/operations/operating.rst b/doc/rados/operations/operating.rst index 9942ea3cabf..8c62ed5cdbf 100644 --- a/doc/rados/operations/operating.rst +++ b/doc/rados/operations/operating.rst @@ -7,11 +7,10 @@ Running Ceph with Upstart ========================= -When deploying Ceph Cuttlefish and beyond with ``ceph-deploy``, you may start -and stop Ceph daemons on a :term:`Ceph Node` using the event-based `Upstart`_. -Upstart does not require you to define daemon instances in the Ceph configuration -file (although, they are still required for ``sysvinit`` should you choose to -use it). +When deploying Ceph Cuttlefish and beyond with ``ceph-deploy`` on Debian/Ubuntu +distributions, you may start and stop Ceph daemons on a :term:`Ceph Node` using +the event-based `Upstart`_. Upstart does not require you to define daemon +instances in the Ceph configuration file. To list the Ceph Upstart jobs and instances on a node, execute:: @@ -19,6 +18,7 @@ To list the Ceph Upstart jobs and instances on a node, execute:: See `initctl`_ for additional details. + Starting all Daemons -------------------- @@ -93,29 +93,20 @@ For example:: sudo start ceph-mds id=ceph-server - .. index:: Ceph service; sysvinit; operating a cluster -Running Ceph as a Service -========================= +Running Ceph +============ -When you deploy Ceph Argonaut or Bobtail with ``mkcephfs``, use the -service or traditional sysvinit. +Each time you to **start**, **restart**, and **stop** Ceph daemons (or your +entire cluster) you must specify at least one option and one command. You may +also specify a daemon type or a daemon instance. :: -The ``ceph`` service provides functionality to **start**, **restart**, and -**stop** your Ceph cluster. Each time you execute ``ceph`` processes, you -must specify at least one option and one command. You may also specify a daemon -type or a daemon instance. For most newer Debian/Ubuntu distributions, you may -use the following syntax:: + {commandline} [options] [commands] [daemons] - sudo service ceph [options] [commands] [daemons] -For older distributions, you may wish to use the ``/etc/init.d/ceph`` path:: - - sudo /etc/init.d/ceph [options] [commands] [daemons] - -The ``ceph`` service options include: +The ``ceph`` options include: +-----------------+----------+-------------------------------------------------+ | Option | Shortcut | Description | @@ -134,7 +125,7 @@ The ``ceph`` service options include: | ``--conf`` | ``-c`` | Use an alternate configuration file. | +-----------------+----------+-------------------------------------------------+ -The ``ceph`` service commands include: +The ``ceph`` commands include: +------------------+------------------------------------------------------------+ | Command | Description | @@ -152,83 +143,213 @@ The ``ceph`` service commands include: | ``cleanalllogs`` | Cleans out **everything** in the log directory. | +------------------+------------------------------------------------------------+ -For subsystem operations, the ``ceph`` service can target specific daemon types by -adding a particular daemon type for the ``[daemons]`` option. Daemon types include: +For subsystem operations, the ``ceph`` service can target specific daemon types +by adding a particular daemon type for the ``[daemons]`` option. Daemon types +include: - ``mon`` - ``osd`` - ``mds`` -The ``ceph`` service's ``[daemons]`` setting may also target a specific instance. -To start a Ceph daemon on the local :term:`Ceph Node`, use the following syntax:: - sudo /etc/init.d/ceph start osd.0 +Running Ceph with sysvinit +-------------------------- -To start a Ceph daemon on another node, use the following syntax:: - - sudo /etc/init.d/ceph -a start osd.0 +Using traditional ``sysvinit`` is the recommended way to run Ceph with CentOS, +Red Hat, Fedora, and SLES distributions. You may also use it for older +distributions of Debian/Ubuntu. -Where ``osd.0`` is the first OSD in the cluster. - -Starting a Cluster ------------------- +Starting all Daemons +~~~~~~~~~~~~~~~~~~~~ To start your Ceph cluster, execute ``ceph`` with the ``start`` command. -The usage may differ based upon your Linux distribution. For example, for most -newer Debian/Ubuntu distributions, you may use the following syntax:: - - sudo service ceph [options] [start|restart] [daemonType|daemonID] - -For older distributions, you may wish to use the ``/etc/init.d/ceph`` path:: +Use the following syntax:: sudo /etc/init.d/ceph [options] [start|restart] [daemonType|daemonID] The following examples illustrates a typical use case:: - sudo service ceph -a start sudo /etc/init.d/ceph -a start Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should begin -operating. You may also specify a particular daemon instance to constrain the -command to a single instance. To start a Ceph daemon on the local Ceph Node, -use the following syntax:: +operating. + + +Stopping all Daemons +~~~~~~~~~~~~~~~~~~~~ + +To stop your Ceph cluster, execute ``ceph`` with the ``stop`` command. +Use the following syntax:: + + sudo /etc/init.d/ceph [options] stop [daemonType|daemonID] + +The following examples illustrates a typical use case:: + + sudo /etc/init.d/ceph -a stop +Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should stop +operating. + + +Starting all Daemons by Type +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To start all Ceph daemons of a particular type on the local Ceph Node, use the +following syntax:: + + sudo /etc/init.d/ceph start {daemon-type} + sudo /etc/init.d/ceph start osd + +To start all Ceph daemons of a particular type on another node, use the +following syntax:: + + sudo /etc/init.d/ceph -a start {daemon-type} + sudo /etc/init.d/ceph -a start osd + + +Stopping all Daemons by Type +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To stop all Ceph daemons of a particular type on the local Ceph Node, use the +following syntax:: + + sudo /etc/init.d/ceph stop {daemon-type} + sudo /etc/init.d/ceph stop osd + +To stop all Ceph daemons of a particular type on another node, use the +following syntax:: + + sudo /etc/init.d/ceph -a stop {daemon-type} + sudo /etc/init.d/ceph -a stop osd + + +Starting a Daemon +~~~~~~~~~~~~~~~~~ + +To start a Ceph daemon on the local Ceph Node, use the following syntax:: + + sudo /etc/init.d/ceph start {daemon-type}.{instance} sudo /etc/init.d/ceph start osd.0 To start a Ceph daemon on another node, use the following syntax:: + sudo /etc/init.d/ceph -a start {daemon-type}.{instance} sudo /etc/init.d/ceph -a start osd.0 -Stopping a Cluster ------------------- +Stopping a Daemon +~~~~~~~~~~~~~~~~~ + +To stop a Ceph daemon on the local Ceph Node, use the following syntax:: + + sudo /etc/init.d/ceph stop {daemon-type}.{instance} + sudo /etc/init.d/ceph stop osd.0 + +To stop a Ceph daemon on another node, use the following syntax:: + + sudo /etc/init.d/ceph -a stop {daemon-type}.{instance} + sudo /etc/init.d/ceph -a stop osd.0 + + +Running Ceph as a Service +------------------------- + +When you deploy Ceph Argonaut or Bobtail with ``mkcephfs``, you operate +Ceph as a service (you may also use sysvinit). + + +Starting all Daemons +~~~~~~~~~~~~~~~~~~~~ + +To start your Ceph cluster, execute ``ceph`` with the ``start`` command. +Use the following syntax:: + + sudo service ceph [options] [start|restart] [daemonType|daemonID] + +The following examples illustrates a typical use case:: + + sudo service ceph -a start + +Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should begin +operating. + + +Stopping all Daemons +~~~~~~~~~~~~~~~~~~~~ To stop your Ceph cluster, execute ``ceph`` with the ``stop`` command. -The usage may differ based upon your Linux distribution. For example, for most -newer Debian/Ubuntu distributions, you may use the following syntax:: +Use the following syntax:: sudo service ceph [options] stop [daemonType|daemonID] For example:: - sudo service ceph -a stop - -For older distributions, you may wish to use the ``/etc/init.d/ceph`` path:: - - sudo /etc/init.d/ceph -a stop + sudo service ceph -a stop Once you execute with ``-a`` (i.e., execute on all nodes), Ceph should shut -down. You may also specify a particular daemon instance to constrain the -command to a single instance. To stop a Ceph daemon on the local Ceph Node, -use the following syntax:: +down. + + +Starting all Daemons by Type +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To start all Ceph daemons of a particular type on the local Ceph Node, use the +following syntax:: + + sudo service ceph start {daemon-type} + sudo service ceph start osd + +To start all Ceph daemons of a particular type on all nodes, use the following +syntax:: + + sudo service ceph -a start {daemon-type} + sudo service ceph -a start osd + + +Stopping all Daemons by Type +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To stop all Ceph daemons of a particular type on the local Ceph Node, use the +following syntax:: + + sudo service ceph stop {daemon-type} + sudo service ceph stop osd + +To stop all Ceph daemons of a particular type on all nodes, use the following +syntax:: + + sudo service ceph -a stop {daemon-type} + sudo service ceph -a stop osd - sudo /etc/init.d/ceph stop osd.0 + +Starting a Daemon +~~~~~~~~~~~~~~~~~ + +To start a Ceph daemon on the local Ceph Node, use the following syntax:: + + sudo service ceph start {daemon-type}.{instance} + sudo service ceph start osd.0 + +To start a Ceph daemon on another node, use the following syntax:: + + sudo service ceph -a start {daemon-type}.{instance} + sudo service ceph -a start osd.0 + + +Stopping a Daemon +~~~~~~~~~~~~~~~~~ + +To stop a Ceph daemon on the local Ceph Node, use the following syntax:: + + sudo service ceph stop {daemon-type}.{instance} + sudo service ceph stop osd.0 To stop a Ceph daemon on another node, use the following syntax:: - sudo /etc/init.d/ceph -a stop osd.0 + sudo service ceph -a stop {daemon-type}.{instance} + sudo service ceph -a stop osd.0 diff --git a/doc/radosgw/config.rst b/doc/radosgw/config.rst index 684a50649ec..caa3dac15e1 100644 --- a/doc/radosgw/config.rst +++ b/doc/radosgw/config.rst @@ -387,6 +387,7 @@ The following configuration options are available for Keystone integration:: rgw keystone accepted roles = {accepted user roles} rgw keystone token cache size = {number of tokens to cache} rgw keystone revocation interval = {number of seconds before checking revoked tickets} + rgw s3 auth use keystone = true nss db path = {path to nss db} A Ceph Object Gateway user is mapped into a Keystone ``tenant``. A Keystone user diff --git a/doc/rbd/libvirt.rst b/doc/rbd/libvirt.rst index cc8dc9bd189..4813c3258d0 100644 --- a/doc/rbd/libvirt.rst +++ b/doc/rbd/libvirt.rst @@ -40,46 +40,11 @@ The most common ``libvirt`` use case involves providing Ceph block devices to cloud solutions like OpenStack or CloudStack. The cloud solution uses ``libvirt`` to interact with QEMU/KVM, and QEMU/KVM interacts with Ceph block devices via ``librbd``. See `Block Devices and OpenStack`_ and `Block Devices -and CloudStack`_ for details. +and CloudStack`_ for details. See `Installation`_ for installation details. You can also use Ceph block devices with ``libvirt``, ``virsh`` and the ``libvirt`` API. See `libvirt Virtualization API`_ for details. -Prerequisites -============= - -- `Install`_ and `configure`_ a Ceph cluster -- `Install and configure`_ QEMU/KVM - - -Installing ``libvirt`` on Ubuntu 12.04 Precise -============================================== - -``libvirt`` packages are incorporated into the Ubuntu 12.04 precise -distribution. To install ``libvirt`` on precise, execute the following:: - - sudo apt-get update && sudo apt-get install libvirt-bin - - -Installing ``libvirt`` on Earlier Versions of Ubuntu -==================================================== - -For Ubuntu distributions 11.10 oneiric and earlier, you must build ``libvirt`` -from source. Clone the ``libvirt`` repository, and use `AutoGen`_ to generate -the build. Then, execute ``make`` and ``make install`` to complete the -installation. For example:: - - git clone git://libvirt.org/libvirt.git - cd libvirt - ./autogen.sh - make - sudo make install - -See `libvirt Installation`_ for details. - - -Using Ceph with Virtual Machines -================================ To create VMs that use Ceph block devices, use the procedures in the following sections. In the exemplary embodiment, we've used ``libvirt-pool`` for the pool @@ -89,7 +54,7 @@ when executing commands in the subsequent procedures. Configuring Ceph ----------------- +================ To configure Ceph for use with ``libvirt``, perform the following steps: @@ -132,7 +97,7 @@ To configure Ceph for use with ``libvirt``, perform the following steps: Preparing the VM Manager ------------------------- +======================== You may use ``libvirt`` without a VM manager, but you may find it simpler to create your first domain with ``virt-manager``. @@ -150,7 +115,7 @@ create your first domain with ``virt-manager``. Creating a VM -------------- +============= To create a VM with ``virt-manager``, perform the following steps: @@ -182,7 +147,7 @@ To create a VM with ``virt-manager``, perform the following steps: Configuring the VM ------------------- +================== When configuring the VM for use with Ceph, it is important to use ``virsh`` where appropriate. Additionally, ``virsh`` commands often require root @@ -290,7 +255,7 @@ commands, refer to `Virsh Command Reference`_. Summary -------- +======= Once you have configured the VM for use with Ceph, you can start the VM. To verify that the VM and Ceph are communicating, you may perform the @@ -320,13 +285,8 @@ If everything looks okay, you may begin using the Ceph block device within your VM. - -.. _AutoGen: http://www.gnu.org/software/autogen/ -.. _libvirt Installation: http://www.libvirt.org/compiling.html +.. _Installation: ../../install .. _libvirt Virtualization API: http://www.libvirt.org -.. _Install: ../../install -.. _configure: ../../rados/configuration -.. _Install and configure: ../qemu-rbd .. _Block Devices and OpenStack: ../rbd-openstack .. _Block Devices and CloudStack: ../rbd-cloudstack .. _Create a pool: ../../rados/operations/pools#create-a-pool diff --git a/doc/rbd/qemu-rbd.rst b/doc/rbd/qemu-rbd.rst index 9d366f3ea8d..e0b55dee257 100644 --- a/doc/rbd/qemu-rbd.rst +++ b/doc/rbd/qemu-rbd.rst @@ -27,33 +27,12 @@ image each time it spins up a new virtual machine. Ceph Block Devices can integrate with the QEMU virtual machine. For details on QEMU, see `QEMU Open Source Processor Emulator`_. For QEMU documentation, see -`QEMU Manual`_. +`QEMU Manual`_. For installation details, see `Installation`_. .. important:: To use Ceph Block Devices with QEMU, you must have access to a running Ceph cluster. -Installing QEMU (12.04 Precise and later) -========================================= - -QEMU packages are incorporated into Ubuntu 12.04 Precise Pangolin and later -versions. To install QEMU, execute the following:: - - sudo apt-get install qemu - -Installing QEMU (11.10 Oneric and earlier) -========================================== - -For Ubuntu distributions 11.10 Oneiric and earlier, you must install -the 0.15 version of QEMU or later. To build QEMU from source, use the -following procedure:: - - cd {your-development-directory} - git clone git://git.qemu.org/qemu.git - cd qemu - ./configure --enable-rbd - make; make install - Creating Images with QEMU ========================= @@ -199,4 +178,5 @@ QEMU command line settings override the Ceph configuration file settings. .. _QEMU Open Source Processor Emulator: http://wiki.qemu.org/Main_Page .. _QEMU Manual: http://wiki.qemu.org/Manual .. _RBD Cache: ../rbd-config-ref/ -.. _Snapshots: ../rbd-snapshot/
\ No newline at end of file +.. _Snapshots: ../rbd-snapshot/ +.. _Installation: ../../install
\ No newline at end of file diff --git a/doc/rbd/rbd-openstack.rst b/doc/rbd/rbd-openstack.rst index 660757639aa..80dd43ce406 100644 --- a/doc/rbd/rbd-openstack.rst +++ b/doc/rbd/rbd-openstack.rst @@ -127,7 +127,7 @@ Hosts running ``nova-compute`` do not need the keyring. Instead, they store the secret key in libvirt. Create a temporary copy of the secret key on the hosts running ``nova-compute``:: - ssh {your-compute-host} client.volumes.key <`ceph auth get-key client.volumes` + ceph auth get-key client.volumes | ssh {your-compute-host} tee client.volumes.key Then, on the compute hosts, add the secret key to libvirt and remove the temporary copy of the key:: diff --git a/doc/install/hardware-recommendations.rst b/doc/start/hardware-recommendations.rst index 90d29e5e7e2..90d29e5e7e2 100644 --- a/doc/install/hardware-recommendations.rst +++ b/doc/start/hardware-recommendations.rst diff --git a/doc/start/index.rst b/doc/start/index.rst index 2fc03c0a284..6e9277746d9 100644 --- a/doc/start/index.rst +++ b/doc/start/index.rst @@ -1,34 +1,6 @@ -================= - Getting Started -================= - -Whether you want to provide :term:`Ceph Object Storage` and/or :term:`Ceph Block -Device` services to :term:`Cloud Platforms`, deploy a :term:`Ceph Filesystem` or -use Ceph for another purpose, all :term:`Ceph Storage Cluster` deployments begin -with setting up each :term:`Ceph Node`, your network and the Ceph Storage -Cluster. A Ceph Storage Cluster has three essential daemons: - -.. ditaa:: +---------------+ +---------------+ +---------------+ - | OSDs | | Monitor | | MDS | - +---------------+ +---------------+ +---------------+ - -- **OSDs**: A :term:`Ceph OSD Daemon` (OSD) stores data, handles data - replication, recovery, backfilling, rebalancing, and provides some monitoring - information to Ceph Monitors by checking other Ceph OSD Daemons for a - heartbeat. A Ceph Storage Cluster requires at least two Ceph OSD Daemons to - achieve an ``active + clean`` state. - -- **Monitors**: A :term:`Ceph Monitor` maintains maps of the cluster state, - including the monitor map, the OSD map, the Placement Group (PG) map, and the - CRUSH map. Ceph maintains a history (called an "epoch") of each state change - in the Ceph Monitors, Ceph OSD Daemons, and PGs. - -- **MDSs**: A :term:`Ceph Metadata Server` (MDS) stores metadata on behalf of - the :term:`Ceph Filesystem` (i.e., Ceph Block Devices and Ceph Object Storage - do not use MDS). Ceph Metadata Servers make it feasible for POSIX file system - users to execute basic commands like ``ls``, ``find``, etc. without placing - an enormous burden on the Ceph Storage Cluster. - +====================== + Installation (Quick) +====================== .. raw:: html @@ -37,18 +9,17 @@ Cluster. A Ceph Storage Cluster has three essential daemons: A :term:`Ceph Client` and a :term:`Ceph Node` may require some basic configuration work prior to deploying a Ceph Storage Cluster. You can also -avail yourself of help from the Ceph community by getting involved. +avail yourself of help by getting involved in the Ceph community. .. toctree:: - Get Involved <get-involved> Preflight <quick-start-preflight> .. raw:: html </td><td><h3>Step 2: Storage Cluster</h3> -Once you've completed your preflight checklist, you should be able to begin +Once you've completed your preflight checklist, you should be able to begin deploying a Ceph Storage Cluster. .. toctree:: diff --git a/doc/start/intro.rst b/doc/start/intro.rst new file mode 100644 index 00000000000..704ff1e8cd5 --- /dev/null +++ b/doc/start/intro.rst @@ -0,0 +1,70 @@ +=============== + Intro to Ceph +=============== + +Whether you want to provide :term:`Ceph Object Storage` and/or :term:`Ceph Block +Device` services to :term:`Cloud Platforms`, deploy a :term:`Ceph Filesystem` or +use Ceph for another purpose, all :term:`Ceph Storage Cluster` deployments begin +with setting up each :term:`Ceph Node`, your network and the Ceph Storage +Cluster. A Ceph Storage Cluster requires at least one Ceph Monitor and at least +two Ceph OSD Daemons. The Ceph Metadata Server is essential when running Ceph +Filesystem clients. + +.. ditaa:: +---------------+ +---------------+ +---------------+ + | OSDs | | Monitor | | MDS | + +---------------+ +---------------+ +---------------+ + +- **OSDs**: A :term:`Ceph OSD Daemon` (OSD) stores data, handles data + replication, recovery, backfilling, rebalancing, and provides some monitoring + information to Ceph Monitors by checking other Ceph OSD Daemons for a + heartbeat. A Ceph Storage Cluster requires at least two Ceph OSD Daemons to + achieve an ``active + clean`` state when the cluster makes two copies of your + data (Ceph makes 2 copies by default, but you can adjust it). + +- **Monitors**: A :term:`Ceph Monitor` maintains maps of the cluster state, + including the monitor map, the OSD map, the Placement Group (PG) map, and the + CRUSH map. Ceph maintains a history (called an "epoch") of each state change + in the Ceph Monitors, Ceph OSD Daemons, and PGs. + +- **MDSs**: A :term:`Ceph Metadata Server` (MDS) stores metadata on behalf of + the :term:`Ceph Filesystem` (i.e., Ceph Block Devices and Ceph Object Storage + do not use MDS). Ceph Metadata Servers make it feasible for POSIX file system + users to execute basic commands like ``ls``, ``find``, etc. without placing + an enormous burden on the Ceph Storage Cluster. + +Ceph stores a client's data as objects within storage pools. Using the CRUSH +algorithm, Ceph calculates which placement group should contain the object, +and further calculates which Ceph OSD Daemon should store the placement group. +The CRUSH algorithm enables the Ceph Storage Cluster to scale, rebalance, and +recover dynamically. + + +.. raw:: html + + <style type="text/css">div.body h3{margin:5px 0px 0px 0px;}</style> + <table cellpadding="10"><colgroup><col width="50%"><col width="50%"></colgroup><tbody valign="top"><tr><td><h3>Recommendations</h3> + +To begin using Ceph in production, you should review our hardware +recommendations and operating system recommendations. + +.. toctree:: + :maxdepth: 2 + + Hardware Recommendations <hardware-recommendations> + OS Recommendations <os-recommendations> + + +.. raw:: html + + </td><td><h3>Get Involved</h3> + + You can avail yourself of help or contribute documentation, source + code or bugs by getting involved in the Ceph community. + +.. toctree:: + + get-involved + +.. raw:: html + + </td></tr></tbody></table> diff --git a/doc/install/os-recommendations.rst b/doc/start/os-recommendations.rst index 71a4d3a278b..d8b418fe1b0 100644 --- a/doc/install/os-recommendations.rst +++ b/doc/start/os-recommendations.rst @@ -36,6 +36,36 @@ platforms. Generally speaking, there is very little dependence on specific distributions aside from the kernel and system initialization package (i.e., sysvinit, upstart, systemd). + +Dumpling (0.67) +--------------- + ++----------+----------+--------------------+--------------+---------+------------+ +| Distro | Release | Code Name | Kernel | Notes | Testing | ++==========+==========+====================+==============+=========+============+ +| Ubuntu | 12.04 | Precise Pangolin | linux-3.2.0 | 1, 2 | B, I, C | ++----------+----------+--------------------+--------------+---------+------------+ +| Ubuntu | 12.10 | Quantal Quetzal | linux-3.5.4 | 2 | B | ++----------+----------+--------------------+--------------+---------+------------+ +| Ubuntu | 13.04 | Raring Ringtail | linux-3.8.5 | | B | ++----------+----------+--------------------+--------------+---------+------------+ +| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2, 3 | B | ++----------+----------+--------------------+--------------+---------+------------+ +| Debian | 7.0 | Wheezy | linux-3.2.0 | 1, 2 | B | ++----------+----------+--------------------+--------------+---------+------------+ +| CentOS | 6.3 | N/A | linux-2.6.32 | 1, 2 | B, I | ++----------+----------+--------------------+--------------+---------+------------+ +| RHEL | 6.3 | | linux-2.6.32 | 1, 2 | B, I | ++----------+----------+--------------------+--------------+---------+------------+ +| Fedora | 18.0 | Spherical Cow | linux-3.6.0 | | B | ++----------+----------+--------------------+--------------+---------+------------+ +| Fedora | 19.0 | Schrödinger's Cat | linux-3.10.0 | | B | ++----------+----------+--------------------+--------------+---------+------------+ +| OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B | ++----------+----------+--------------------+--------------+---------+------------+ + + + Cuttlefish (0.61) ----------------- @@ -63,6 +93,7 @@ Cuttlefish (0.61) | OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B | +----------+----------+--------------------+--------------+---------+------------+ + Bobtail (0.56) -------------- @@ -90,6 +121,7 @@ Bobtail (0.56) | OpenSuse | 12.2 | N/A | linux-3.4.0 | 2 | B | +----------+----------+--------------------+--------------+---------+------------+ + Argonaut (0.48) --------------- @@ -126,6 +158,7 @@ Notes ``ceph-osd`` daemons using ``XFS`` or ``ext4`` on the same host will not perform as well as they could. + Testing ------- diff --git a/doc/start/quick-ceph-deploy.rst b/doc/start/quick-ceph-deploy.rst index 3c0ca1b0653..1fabd1b182f 100644 --- a/doc/start/quick-ceph-deploy.rst +++ b/doc/start/quick-ceph-deploy.rst @@ -3,26 +3,31 @@ ============================= If you haven't completed your `Preflight Checklist`_, do that first. This -**Quick Start** sets up a two-node demo cluster so you can explore some of the -:term:`Ceph Storage Cluster` functionality. This **Quick Start** will help you -install a minimal Ceph Storage Cluster on a server node from your admin node -using ``ceph-deploy``. +**Quick Start** sets up a :term:`Ceph Storage Cluster` using ``ceph-deploy`` +on your admin node. Create a three Ceph Node cluster so you can +explore Ceph functionality. .. ditaa:: - /----------------\ /----------------\ - | Admin Node |<------->| Server Node | - | cCCC | | cCCC | - +----------------+ +----------------+ - | Ceph Commands | | ceph - mon | - \----------------/ +----------------+ - | ceph - osd | - +----------------+ - | ceph - mds | - \----------------/ - - -For best results, create a directory on your admin node for maintaining the -configuration of your cluster. :: + /------------------\ /----------------\ + | Admin Node | | ceph–node1 | + | +-------->+ cCCC | + | ceph–deploy | | mon.ceph–node1 | + \---------+--------/ \----------------/ + | + | /----------------\ + | | ceph–node2 | + +----------------->+ cCCC | + | | osd.0 | + | \----------------/ + | + | /----------------\ + | | ceph–node3 | + +----------------->| cCCC | + | osd.1 | + \----------------/ + +For best results, create a directory on your admin node node for maintaining the +configuration that ``ceph-deploy`` generates for your cluster. :: mkdir my-cluster cd my-cluster @@ -31,228 +36,283 @@ configuration of your cluster. :: current directory. Ensure you are in this directory when executing ``ceph-deploy``. +As a first exercise, create a Ceph Storage Cluster with one Ceph Monitor and two +Ceph OSD Daemons. Once the cluster reaches a ``active + clean`` state, expand it +by adding a third Ceph OSD Daemon, a Metadata Server and two more Ceph Monitors. + +.. important:: Do not call ``ceph-deploy`` with ``sudo`` or run it as ``root`` + if you are logged in as a different user, because it will not issue ``sudo`` + commands needed on the remote host. Create a Cluster ================ -To create your Ceph Storage Cluster, declare its initial monitors, generate a -filesystem ID (``fsid``) and generate monitor keys by entering the following -command on a commandline prompt:: +If at any point you run into trouble and you want to start over, execute +the following:: - ceph-deploy new {mon-server-name} - ceph-deploy new mon-ceph-node + ceph-deploy purgedata {ceph-node} [{ceph-node}] + ceph-deploy forgetkeys -Check the output of ``ceph-deploy`` with ``ls`` and ``cat`` in the current -directory. You should see a Ceph configuration file, a keyring, and a log file -for the new cluster. See `ceph-deploy new -h`_ for additional details. -.. topic:: Single Node Quick Start +On your admin node, perform the following steps using ``ceph-deploy``. - Assuming only one node for your Ceph Storage Cluster, you will need to - modify the default ``osd crush chooseleaf type`` setting (it defaults to - ``1`` for ``node``) to ``0`` for ``device`` so that it will peer with OSDs - on the local node. Add the following line to your Ceph configuration file:: - - osd crush chooseleaf type = 0 +#. Create the cluster. :: -.. tip:: If you deploy without executing foregoing step on a single node - cluster, your Ceph Storage Cluster will not achieve an ``active + clean`` - state. To remedy this situation, you must modify your `CRUSH Map`_. + ceph-deploy new {ceph-node} + ceph-deploy new ceph-node1 -Install Ceph -============ + Check the output of ``ceph-deploy`` with ``ls`` and ``cat`` in the current + directory. You should see a Ceph configuration file, a keyring, and a log + file for the new cluster. See `ceph-deploy new -h`_ for additional details. -To install Ceph on your server node, open a command line on your admin -node and type the following:: +#. Install Ceph. :: - ceph-deploy install {server-node-name}[,{server-node-name}] - ceph-deploy install mon-ceph-node + ceph-deploy install {ceph-node}[{ceph-node} ...] + ceph-deploy install ceph-node1 ceph-node2 ceph-node3 -Without additional arguments, ``ceph-deploy`` will install the most recent -stable Ceph package to the server node. See `ceph-deploy install -h`_ for -additional details. -.. tip:: When ``ceph-deploy`` completes installation successfully, - it should echo ``OK``. +#. Add a Ceph Monitor. :: + ceph-deploy mon create {ceph-node} + ceph-deploy mon create ceph-node1 + +#. Gather keys. :: -Add a Monitor -============= + ceph-deploy gatherkeys {ceph-node} + ceph-deploy gatherkeys ceph-node1 -To run a Ceph cluster, you need at least one Ceph Monitor. When using -``ceph-deploy``, the tool enforces a single Ceph Monitor per node. Execute the -following to create a Ceph Monitor:: + Once you have gathered keys, your local directory should have the following + keyrings: - ceph-deploy mon create {mon-server-name} - ceph-deploy mon create mon-ceph-node + - ``{cluster-name}.client.admin.keyring`` + - ``{cluster-name}.bootstrap-osd.keyring`` + - ``{cluster-name}.bootstrap-mds.keyring`` + -.. tip:: In production environments, we recommend running Ceph Monitors on - nodes that do not run OSDs. +#. Add two OSDs. For fast setup, this quick start uses a directory rather + than an entire disk per Ceph OSD Daemon. See `ceph-deploy osd`_ for + details on using separate disks/partitions for OSDs and journals. + Login to the Ceph Nodes and create a directory for + the Ceph OSD Daemon. :: + + ssh ceph-node2 + sudo mkdir /tmp/osd0 + exit + + ssh ceph-node3 + sudo mkdir /tmp/osd1 + exit -When you have added a monitor successfully, directories under ``/var/lib/ceph`` -on your server node should have subdirectories ``bootstrap-mds`` and -``bootstrap-osd`` that contain keyrings. If these directories do not contain -keyrings, execute ``ceph-deploy mon create`` again on the admin node. + Then, from your admin node, use ``ceph-deploy`` to prepare the OSDs. :: + ceph-deploy osd prepare {ceph-node}:/path/to/directory + ceph-deploy osd prepare ceph-node2:/tmp/osd0 ceph-node3:/tmp/osd1 -Gather Keys -=========== + Finally, activate the OSDs. :: -To deploy additional daemons and provision them with monitor authentication keys -from your admin node, you must first gather keys from a monitor node. Execute -the following to gather keys:: + ceph-deploy osd activate {ceph-node}:/path/to/directory + ceph-deploy osd activate ceph-node2:/tmp/osd0 ceph-node3:/tmp/osd1 - ceph-deploy gatherkeys {mon-server-name} - ceph-deploy gatherkeys mon-ceph-node +#. Use ``ceph-deploy`` to copy the configuration file and admin key to + your admin node and your Ceph Nodes so that you can use the ``ceph`` + CLI without having to specify the monitor address and + ``ceph.client.admin.keyring`` each time you execute a command. :: + + ceph-deploy admin {ceph-node} + ceph-deploy admin admin-node ceph-node1 ceph-node2 ceph-node3 -Once you have gathered keys, your local directory should have the following keyrings: + **Note:** Since you are using ``ceph-deploy`` to talk to the + local host, your host must be reachable by its hostname + (e.g., you can modify ``/etc/hosts`` if necessary). Ensure that + you have the correct permissions for the ``ceph.client.admin.keyring``. -- ``{cluster-name}.client.admin.keyring`` -- ``{cluster-name}.bootstrap-osd.keyring`` -- ``{cluster-name}.bootstrap-mds.keyring`` +#. Check your cluster's health. :: -If you don't have these keyrings, you may not have created a monitor successfully, -or you may have a problem with your network connection. Ensure that you complete -this step such that you have the foregoing keyrings before proceeding further. + ceph health -.. tip:: You may repeat this procedure. If it fails, check to see if the - ``/var/lib/ceph/boostrap-{osd}|{mds}`` directories on the server node - have keyrings. If they do not have keyrings, try adding the monitor again; - then, return to this step. + Your cluster should return an ``active + clean`` state when it + has finished peering. -Add Ceph OSD Daemons -==================== +Operating Your Cluster +====================== -For a cluster's object placement groups to reach an ``active + clean`` state, -you must have at least two instances of a :term:`Ceph OSD Daemon` running and -at least two copies of an object (``osd pool default size`` is ``2`` -by default). +Deploying a Ceph cluster with ``ceph-deploy`` automatically starts the cluster. +To operate the cluster daemons with Debian/Ubuntu distributions, see +`Running Ceph with Upstart`_. To operate the cluster daemons with CentOS, +Red Hat, Fedora, and SLES distributions, see `Running Ceph with sysvinit`_. -Adding Ceph OSD Daemons is slightly more involved than other ``ceph-deploy`` -commands, because a Ceph OSD Daemon involves both a data store and a journal. -The ``ceph-deploy`` tool has the ability to invoke ``ceph-disk-prepare`` to -prepare the disk and activate the Ceph OSD Daemon for you. +To learn more about peering and cluster health, see `Monitoring a Cluster`_. +To learn more about Ceph OSD Daemon and placement group health, see +`Monitoring OSDs and PGs`_. + +Once you deploy a Ceph cluster, you can try out some of the administration +functionality, the ``rados`` object store command line, and then proceed to +Quick Start guides for Ceph Block Device, Ceph Filesystem, and the Ceph Object +Gateway. -Multiple OSDs on the OS Disk (Demo Only) ----------------------------------------- -For demonstration purposes, you may wish to add multiple OSDs to the OS disk -(not recommended for production systems). To use Ceph OSDs daemons on the OS -disk, you must use ``prepare`` and ``activate`` as separate steps. First, -define a directory for the Ceph OSD daemon(s). :: - - mkdir /tmp/osd0 - mkdir /tmp/osd1 - -Then, use ``prepare`` to prepare the directory(ies) for use with a -Ceph OSD Daemon. :: - - ceph-deploy osd prepare {osd-node-name}:/tmp/osd0 - ceph-deploy osd prepare {osd-node-name}:/tmp/osd1 +Expanding Your Cluster +====================== -Finally, use ``activate`` to activate the Ceph OSD Daemons. :: +Once you have a basic cluster up and running, the next step is to expand +cluster. Add a Ceph OSD Daemon and a Ceph Metadata Server to ``ceph-node1``. +Then add a Ceph Monitor to ``ceph-node2`` and ``ceph-node3`` to establish a +quorum of Ceph Monitors. - ceph-deploy osd activate {osd-node-name}:/tmp/osd0 - ceph-deploy osd activate {osd-node-name}:/tmp/osd1 +.. ditaa:: + /------------------\ /----------------\ + | ceph–deploy | | ceph–node1 | + | Admin Node | | cCCC | + | +-------->+ mon.ceph–node1 | + | | | osd.2 | + | | | mds.ceph–node1 | + \---------+--------/ \----------------/ + | + | /----------------\ + | | ceph–node2 | + | | cCCC | + +----------------->+ | + | | osd.0 | + | | mon.ceph–node2 | + | \----------------/ + | + | /----------------\ + | | ceph–node3 | + | | cCCC | + +----------------->+ | + | osd.1 | + | mon.ceph–node3 | + \----------------/ -.. tip:: You need two OSDs to reach an ``active + clean`` state. You can - add one OSD at a time, but OSDs need to communicate with each other - for Ceph to run properly. Always use more than one OSD per cluster. +Adding an OSD +------------- +Since you are running a 3-node cluster for demonstration purposes, add the OSD +to the monitor node. :: -List Disks ----------- + ssh ceph-node1 + sudo mkdir /tmp/osd2 + exit -To list the available disk drives on a prospective :term:`Ceph Node`, execute -the following:: +Then, from your ``ceph-deploy`` node, prepare the OSD. :: - ceph-deploy disk list {osd-node-name} - ceph-deploy disk list ceph-node + ceph-deploy osd prepare {ceph-node}:/path/to/directory + ceph-deploy osd prepare ceph-node1:/tmp/osd2 +Finally, activate the OSDs. :: -Zap a Disk ----------- + ceph-deploy osd activate {ceph-node}:/path/to/directory + ceph-deploy osd activate ceph-node1:/tmp/osd2 -To zap a disk (delete its partition table) in preparation for use with Ceph, -execute the following:: - ceph-deploy disk zap {osd-node-name}:{disk} - ceph-deploy disk zap ceph-node:sdb ceph-node:sdb2 +Once you have added your new OSD, Ceph will begin rebalancing the cluster by +migrating placement groups to your new OSD. You can observe this process with +the ``ceph`` CLI. :: -.. important:: This will delete all data on the disk. + ceph -w +You should see the placement group states change from ``active+clean`` to active +with some degraded objects, and finally ``active+clean`` when migration +completes. (Control-c to exit.) -Add OSDs on Standalone Disks ----------------------------- -You can add OSDs using ``prepare`` and ``activate`` in two discrete -steps. To prepare a disk for use with a Ceph OSD Daemon, execute the -following:: +Add a Metadata Server +--------------------- - ceph-deploy osd prepare {osd-node-name}:{osd-disk-name}[:/path/to/journal] - ceph-deploy osd prepare ceph-node:sdb +To use CephFS, you need at least one metadata server. Execute the following to +create a metadata server:: -To activate the Ceph OSD Daemon, execute the following:: + ceph-deploy mds create {ceph-node} + ceph-deploy mds create ceph-node1 - ceph-deploy osd activate {osd-node-name}:{osd-partition-name} - ceph-deploy osd activate ceph-node:sdb1 -To prepare an OSD disk and activate it in one step, execute the following:: +.. note:: Currently Ceph runs in production with one metadata server only. You + may use more, but there is currently no commercial support for a cluster + with multiple metadata servers. - ceph-deploy osd create {osd-node-name}:{osd-disk-name}[:/path/to/journal] [{osd-node-name}:{osd-disk-name}[:/path/to/journal]] - ceph-deploy osd create ceph-node:sdb:/dev/ssd1 ceph-node:sdc:/dev/ssd2 +Adding Monitors +--------------- -.. note:: The journal example assumes you will use a partition on a separate - solid state drive (SSD). If you omit a journal drive or partition, - ``ceph-deploy`` will use create a separate partition for the journal - on the same drive. If you have already formatted your disks and created - partitions, you may also use partition syntax for your OSD disk. +A Ceph Storage Cluster requires at least one Ceph Monitor to run. For high +availability, Ceph Storage Clusters typically run multiple Ceph +Monitors so that the failure of a single Ceph Monitor will not bring down the +Ceph Storage Cluster. Ceph uses the Paxos algorithm, which requires a majority +of monitors (i.e., 1, 2:3, 3:4, 3:5, 4:6, etc.) to form a quorum. -You must add a minimum of two Ceph OSD Daemons for the placement groups in -a cluster to achieve an ``active + clean`` state. +Add two Ceph Monitors to your cluster. :: + ceph-deploy mon create {ceph-node} + ceph-deploy mon create ceph-node2 ceph-node3 -Add a MDS -========= +Once you have added your new Ceph Monitors, Ceph will begin synchronizing +the monitors and form a quorum. You can check the quorum status by executing +the following:: -To use CephFS, you need at least one metadata node. Execute the following to -create a metadata node:: + ceph quorum_status - ceph-deploy mds create {node-name} - ceph-deploy mds create ceph-node -.. note:: Currently Ceph runs in production with one metadata node only. You - may use more, but there is currently no commercial support for a cluster - with multiple metadata nodes. +Storing/Retrieving Object Data +============================== +To store object data in the Ceph Storage Cluster, a Ceph client must: -Summary -======= +#. Set an object name +#. Specify a `pool`_ -Deploying a Ceph cluster with ``ceph-deploy`` automatically starts the cluster. -To operate the cluster daemons, see `Running Ceph with Upstart`_. +The Ceph Client retrieves the latest cluster map and the CRUSH algorithm +calculates how to map the object to a `placement group`_, and then calculates +how to assign the placement group to a Ceph OSD Daemon dynamically. To find the +object location, all you need is the object name and the pool name. For +example:: -Once you deploy a Ceph cluster, you can try out some of the administration -functionality, the object store command line, and then proceed to Quick Start -guides for RBD, CephFS, and the Ceph Gateway. + ceph osd map {poolname} {object-name} -.. topic:: Other ceph-deploy Commands +.. topic:: Exercise: Locate an Object - To view other ``ceph-deploy`` commands, execute: - - ``ceph-deploy -h`` - + As an exercise, lets create an object. Specify an object name, a path to + a test file containing some object data and a pool name using the + ``rados put`` command on the command line. For example:: + + rados put {object-name} {file-path} --pool=data + rados put test-object-1 testfile.txt --pool=data + + To verify that the Ceph Storage Cluster stored the object, execute + the following:: + + rados -p data ls + + Now, identify the object location:: -See `Ceph Deploy`_ for additional details. + ceph osd map {pool-name} {object-name} + ceph osd map data test-object-1 + + Ceph should output the object's location. For example:: + + osdmap e537 pool 'data' (0) object 'test-object-1' -> pg 0.d1743484 (0.4) -> up [1,0] acting [1,0] + + To remove the test object, simply delete it using the ``rados rm`` + command. For example:: + + rados rm test-object-1 --pool=data + +As the cluster evolves, the object location may change dynamically. One benefit +of Ceph's dynamic rebalancing is that Ceph relieves you from having to perform +the migration manually. .. _Preflight Checklist: ../quick-start-preflight .. _Ceph Deploy: ../../rados/deployment .. _ceph-deploy install -h: ../../rados/deployment/ceph-deploy-install .. _ceph-deploy new -h: ../../rados/deployment/ceph-deploy-new +.. _ceph-deploy osd: ../../rados/deployment/ceph-deploy-osd .. _Running Ceph with Upstart: ../../rados/operations/operating#running-ceph-with-upstart -.. _CRUSH Map: ../../rados/operations/crush-map
\ No newline at end of file +.. _Running Ceph with sysvinit: ../../rados/operations/operating#running-ceph-with-sysvinit +.. _CRUSH Map: ../../rados/operations/crush-map +.. _pool: ../../rados/operations/pools +.. _placement group: ../../rados/operations/placement-groups +.. _Monitoring a Cluster: ../../rados/operations/monitoring +.. _Monitoring OSDs and PGs: ../../rados/operations/monitoring-osd-pg
\ No newline at end of file diff --git a/doc/start/quick-cephfs.rst b/doc/start/quick-cephfs.rst index 18dadb005ec..5449e5a6fe3 100644 --- a/doc/start/quick-cephfs.rst +++ b/doc/start/quick-cephfs.rst @@ -3,7 +3,7 @@ ===================== To use the :term:`Ceph FS` Quick Start guide, you must have executed the -procedures in the `Ceph Deploy Quick Start`_ guide first. Execute this quick +procedures in the `Storage Cluster Quick Start`_ guide first. Execute this quick start on the Admin Host. Prerequisites @@ -91,7 +91,7 @@ See `Ceph FS`_ for additional information. Ceph FS is not quite as stable as the Ceph Block Device and Ceph Object Storage. See `Troubleshooting`_ if you encounter trouble. -.. _Ceph Deploy Quick Start: ../quick-ceph-deploy +.. _Storage Cluster Quick Start: ../quick-ceph-deploy .. _Ceph FS: ../../cephfs/ .. _FAQ: http://wiki.ceph.com/03FAQs/01General_FAQ#How_Can_I_Give_Ceph_a_Try.3F .. _Troubleshooting: ../../cephfs/troubleshooting
\ No newline at end of file diff --git a/doc/start/quick-rbd.rst b/doc/start/quick-rbd.rst index a466771502d..9424457f8c2 100644 --- a/doc/start/quick-rbd.rst +++ b/doc/start/quick-rbd.rst @@ -2,47 +2,73 @@ Block Device Quick Start ========================== -To use this guide, you must have executed the procedures in the `Object Store -Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is in an -``active + clean`` state before working with the :term:`Ceph Block Device`. -Execute this quick start on the admin node. +To use this guide, you must have executed the procedures in the `Storage +Cluster Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is +in an ``active + clean`` state before working with the :term:`Ceph Block +Device`. .. note:: The Ceph Block Device is also known as :term:`RBD` or :term:`RADOS` Block Device. -#. Install ``ceph-common``. :: - sudo apt-get install ceph-common +.. ditaa:: + /------------------\ /----------------\ + | Admin Node | | ceph–client | + | +-------->+ cCCC | + | ceph–deploy | | ceph | + \------------------/ \----------------/ -#. Create a block device image. :: - rbd create foo --size 4096 [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring] +You may use a virtual machine for your ``ceph-client`` node, but do not +execute the following procedures on the same physical node as your Ceph +Storage Cluster nodes (unless you use a VM). See `FAQ`_ for details. -#. Load the ``rbd`` client module. :: + +Install Ceph +============ + +#. On the admin node, use ``ceph-deploy`` to install Ceph on your + ``ceph-client`` node. :: + + ceph-deploy install ceph-client + +#. On the admin node, use ``ceph-deploy`` to copy the Ceph configuration file + and the ``ceph.client.admin.keyring`` to the ``ceph-client``. :: + + ceph-deploy admin ceph-client + + +Configure a Block Device +======================== + +#. On the ``ceph-client`` node, create a block device image. :: + + rbd create foo --size 4096 [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring] + +#. On the ``ceph-client`` node, load the ``rbd`` client module. :: sudo modprobe rbd -#. Map the image to a block device. :: +#. On the ``ceph-client`` node, map the image to a block device. :: sudo rbd map foo --pool rbd --name client.admin [-m {mon-IP}] [-k /path/to/ceph.client.admin.keyring] -#. Use the block device. In the following example, create a file system. :: +#. Use the block device by creating a file system on the ``ceph-client`` + node. :: sudo mkfs.ext4 -m0 /dev/rbd/rbd/foo This may take a few moments. -#. Mount the file system. :: +#. Mount the file system on the ``ceph-client`` node. :: sudo mkdir /mnt/ceph-block-device sudo mount /dev/rbd/rbd/foo /mnt/ceph-block-device cd /mnt/ceph-block-device -.. note:: Mount the block device on the client machine, - not the server machine. See `FAQ`_ for details. See `block devices`_ for additional details. -.. _Object Store Quick Start: ../quick-ceph-deploy +.. _Storage Cluster Quick Start: ../quick-ceph-deploy .. _block devices: ../../rbd/rbd .. _FAQ: http://wiki.ceph.com/03FAQs/01General_FAQ#How_Can_I_Give_Ceph_a_Try.3F diff --git a/doc/start/quick-rgw.rst b/doc/start/quick-rgw.rst index af48a3154c1..40cf7d4f4dc 100644 --- a/doc/start/quick-rgw.rst +++ b/doc/start/quick-rgw.rst @@ -2,7 +2,7 @@ Object Storage Quick Start ============================ -To use this guide, you must have executed the procedures in the `Ceph Deploy +To use this guide, you must have executed the procedures in the `Storage Cluster Quick Start`_ guide first. Ensure your :term:`Ceph Storage Cluster` is in an ``active + clean`` state before working with the :term:`Ceph Object Storage`. @@ -344,7 +344,7 @@ tutorials. See the `S3-compatible`_ and `Swift-compatible`_ APIs for details. .. _Create rgw.conf: ../../radosgw/config/index.html#create-rgw-conf -.. _Ceph Deploy Quick Start: ../quick-ceph-deploy +.. _Storage Cluster Quick Start: ../quick-ceph-deploy .. _Ceph Object Storage Manual Install: ../../radosgw/manual-install .. _RGW Configuration: ../../radosgw/config .. _S3-compatible: ../../radosgw/s3 diff --git a/doc/start/quick-start-preflight.rst b/doc/start/quick-start-preflight.rst index 74dc403c211..77a54795f19 100644 --- a/doc/start/quick-start-preflight.rst +++ b/doc/start/quick-start-preflight.rst @@ -4,74 +4,57 @@ .. versionadded:: 0.60 -Thank you for trying Ceph! Petabyte-scale data clusters are quite an -undertaking. Before delving deeper into Ceph, we recommend setting up a two-node -demo cluster to explore some of the functionality. This **Preflight Checklist** -will help you prepare an admin node and a server node for use with -``ceph-deploy``. - -.. ditaa:: - /----------------\ /----------------\ - | Admin Node |<------->| Server Node | - | cCCC | | cCCC | - \----------------/ \----------------/ - - -Before you can deploy Ceph using ``ceph-deploy``, you need to ensure that you -have a few things set up first on your admin node and on nodes running Ceph -daemons. - - -Install an Operating System -=========================== - -Install a recent release of Debian or Ubuntu (e.g., 12.04, 12.10, 13.04) on your -nodes. For additional details on operating systems or to use other operating -systems other than Debian or Ubuntu, see `OS Recommendations`_. - - -Install an SSH Server -===================== - -The ``ceph-deploy`` utility requires ``ssh``, so your server node(s) require an -SSH server. :: - - sudo apt-get install openssh-server - - -Create a User -============= - -Create a user on nodes running Ceph daemons. - -.. tip:: We recommend a username that brute force attackers won't - guess easily (e.g., something other than ``root``, ``ceph``, etc). - -:: +Thank you for trying Ceph! We recommend setting up a ``ceph-deploy`` admin node +and a 3-node :term:`Ceph Storage Cluster` to explore the basics of Ceph. This +**Preflight Checklist** will help you prepare a ``ceph-deploy`` admin node and +three Ceph Nodes (or virtual machines) that will host your Ceph Storage Cluster. + + +.. ditaa:: + /------------------\ /----------------\ + | Admin Node | | ceph–node1 | + | +-------->+ | + | ceph–deploy | | cCCC | + \---------+--------/ \----------------/ + | + | /----------------\ + | | ceph–node2 | + +----------------->+ | + | | cCCC | + | \----------------/ + | + | /----------------\ + | | ceph–node3 | + +----------------->| | + | cCCC | + \----------------/ + + +Ceph Node Setup +=============== + +Perform the following steps: + +#. Create a user on each Ceph Node. :: ssh user@ceph-server sudo useradd -d /home/ceph -m ceph sudo passwd ceph - -``ceph-deploy`` installs packages onto your nodes. This means that -the user you create requires passwordless ``sudo`` privileges. - -.. note:: We **DO NOT** recommend enabling the ``root`` password - for security reasons. - -To provide full privileges to the user, add the following to -``/etc/sudoers.d/ceph``. :: +#. Add ``root`` privileges for the user on each Ceph Node. :: echo "ceph ALL = (root) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/ceph sudo chmod 0440 /etc/sudoers.d/ceph -Configure SSH -============= +#. Install an SSH server (if necessary):: -Configure your admin machine with password-less SSH access to each node -running Ceph daemons (leave the passphrase empty). :: + sudo apt-get install openssh-server + sudo yum install openssh-server + + +#. Configure your ``ceph-deploy`` admin node with password-less SSH access to + each Ceph Node. Leave the passphrase empty:: ssh-keygen Generating public/private key pair. @@ -81,77 +64,95 @@ running Ceph daemons (leave the passphrase empty). :: Your identification has been saved in /ceph-client/.ssh/id_rsa. Your public key has been saved in /ceph-client/.ssh/id_rsa.pub. -Copy the key to each node running Ceph daemons:: +#. Copy the key to each Ceph Node. :: ssh-copy-id ceph@ceph-server -Modify your ~/.ssh/config file of your admin node so that it defaults -to logging in as the user you created when no username is specified. :: + +#. Modify the ``~/.ssh/config`` file of your ``ceph-deploy`` admin node so that + it logs in to Ceph Nodes as the user you created (e.g., ``ceph``). :: Host ceph-server - Hostname ceph-server.fqdn-or-ip-address.com - User ceph + Hostname ceph-server.fqdn-or-ip-address.com + User ceph + + +#. Ensure connectivity using ``ping`` with hostnames (i.e., not IP addresses). + Address hostname resolution issues and firewall issues as necessary. -.. note:: Do not call ceph-deploy with ``sudo`` or run as ``root`` if you are - login in as a different user (as in the ssh config above) because it - will not issue ``sudo`` commands needed on the remote host. -Install ceph-deploy -=================== +Ceph Deploy Setup +================= -To install ``ceph-deploy``, execute the following:: +Add Ceph repositories to the ``ceph-deploy`` admin node. Then, install +``ceph-deploy``. + +.. important:: Do not call ``ceph-deploy`` with ``sudo`` or run it as ``root`` + if you are logged in as a different user, because it will not issue ``sudo`` + commands needed on the remote host. + + +Advanced Package Tool (APT) +--------------------------- + +For Debian and Ubuntu distributions, perform the following steps: + +#. Add the release key:: wget -q -O- 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc' | sudo apt-key add - echo deb http://ceph.com/debian-dumpling/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list sudo apt-get update sudo apt-get install ceph-deploy +#. Add the Ceph packages to your repository. Replace ``{ceph-stable-release}`` + with a stable Ceph release (e.g., ``cuttlefish``, ``dumpling``, etc.). + For example:: + + echo deb http://ceph.com/debian-{ceph-stable-release}/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list -Ensure Connectivity -=================== +#. Update your repository and install ``ceph-deploy``:: -Ensure that your admin node has connectivity to the network and to your Server -node (e.g., ensure ``iptables``, ``ufw`` or other tools that may prevent -connections, traffic forwarding, etc. to allow what you need). + sudo apt-get update && sudo apt-get install ceph-deploy -.. tip:: The ``ceph-deploy`` tool is new and you may encounter some issues - without effective error messages. -Once you have completed this pre-flight checklist, you are ready to begin using -``ceph-deploy``. +Red Hat Package Manager (RPM) +----------------------------- +For Red Hat(rhel6), CentOS (el6), Fedora 17-19 (f17-f19), OpenSUSE 12 +(opensuse12), and SLES (sles11) perform the following steps: -Hostname Resolution -=================== +#. Add the package to your repository. Open a text editor and create a + Yellowdog Updater, Modified (YUM) entry. Use the file path + ``/etc/yum.repos.d/ceph.repo``. For example:: -Ensure that your admin node can resolve the server node's hostname. :: + sudo vim /etc/yum.repos.d/ceph.repo - ping {server-node} + Paste the following example code. Replace ``{ceph-stable-release}`` with + the recent stable release of Ceph (e.g., ``dumpling``). Replace ``{distro}`` + with your Linux distribution (e.g., ``el6`` for CentOS 6, ``rhel6`` for + Red Hat 6, ``fc18`` or ``fc19`` for Fedora 18 or Fedora 19, and ``sles11`` + for SLES 11). Finally, save the contents to the + ``/etc/yum.repos.d/ceph.repo`` file. :: -If you execute ``ceph-deploy`` against the localhost, ``ceph-deploy`` -must be able to resolve its IP address. Consider adding the IP address -to your ``/etc/hosts`` file such that it resolves to the hostname. :: + [ceph-noarch] + name=Ceph noarch packages + baseurl=http://ceph.com/rpm-{ceph-stable-release}/{distro}/noarch + enabled=1 + gpgcheck=1 + type=rpm-md + gpgkey=https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc - hostname - host -4 {hostname} - sudo vim /etc/hosts - {ip-address} {hostname} +#. Update your repository and install ``ceph-deploy``:: - ceph-deploy {command} {hostname} + sudo yum update && sudo yum install ceph-deploy -.. tip:: The ``ceph-deploy`` tool will not resolve to ``localhost``. Use - the hostname. Summary ======= -Once you have passwordless ``ssh`` connectivity, passwordless ``sudo``, -installed ``ceph-deploy``, and you have ensured appropriate connectivity, -proceed to the `Storage Cluster Quick Start`_. - -.. tip:: The ``ceph-deploy`` utility can install Ceph packages on remote - machines from the admin node! +This completes the Quick Start Preflight. Proceed to the `Storage Cluster +Quick Start`_. .. _Storage Cluster Quick Start: ../quick-ceph-deploy .. _OS Recommendations: ../../install/os-recommendations diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index 09e55b9a842..f0fa37893b1 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -147,7 +147,9 @@ ceph mds newfs 0 1 --yes-i-really-mean-it ceph osd pool create data2 10 poolnum=$(ceph osd dump | grep 'pool.*data2' | awk '{print $2;}') ceph mds add_data_pool $poolnum +ceph mds add_data_pool rbd ceph mds remove_data_pool $poolnum +ceph mds remove_data_pool rbd ceph osd pool delete data2 data2 --yes-i-really-really-mean-it ceph mds set_max_mds 4 ceph mds set_max_mds 3 @@ -325,6 +327,9 @@ ceph osd pool set data size 3 ceph osd pool get data size | grep 'size: 3' ceph osd pool set data size 2 +ceph osd pool set data hashpspool true +ceph osd pool set data hashpspool false + ceph osd pool get rbd crush_ruleset | grep 'crush_ruleset: 2' ceph osd thrash 10 diff --git a/qa/workunits/misc/mkpool_layout_vxattrs.sh b/qa/workunits/misc/mkpool_layout_vxattrs.sh index 16b3cdfe517..91d31664898 100755 --- a/qa/workunits/misc/mkpool_layout_vxattrs.sh +++ b/qa/workunits/misc/mkpool_layout_vxattrs.sh @@ -4,10 +4,12 @@ set -e touch foo.$$ rados mkpool foo.$$ -poolid=$(ceph osd dump | grep "^pool" | awk '{print $2}' | tail -n 1) -ceph mds add_data_pool ${poolid} +ceph mds add_data_pool foo.$$ setfattr -n ceph.file.layout.pool -v foo.$$ foo.$$ # cleanup -rados rmpool foo.$$ foo.$$ --yes-i-really-really-mean-it rm foo.$$ +ceph mds remove_data_pool foo.$$ +rados rmpool foo.$$ foo.$$ --yes-i-really-really-mean-it + +echo OK diff --git a/qa/workunits/suites/fsstress.sh b/qa/workunits/suites/fsstress.sh index 7f945172687..394e5fad991 100755 --- a/qa/workunits/suites/fsstress.sh +++ b/qa/workunits/suites/fsstress.sh @@ -2,6 +2,7 @@ if [ ! -f /usr/lib/ltp/testcases/bin/fsstress ] then + path=`pwd` mkdir -p /tmp/fsstress cd /tmp/fsstress wget -q -O /tmp/fsstress/ltp-full.tgz http://ceph.com/qa/ltp-full-20091231.tgz @@ -13,6 +14,7 @@ then sudo cp -avf /tmp/fsstress/ltp-full-20091231/testcases/kernel/fs/fsstress/fsstress /usr/lib/ltp/testcases/bin/fsstress sudo chmod 755 /usr/lib/ltp/testcases/bin/fsstress rm -Rf /tmp/fsstress + cd $path fi command="/usr/lib/ltp/testcases/bin/fsstress -d fsstress-`hostname`$$ -l 1 -n 1000 -p 10 -v" diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index 165ca437987..2851f2bd702 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -2,6 +2,7 @@ #include "include/types.h" #include "cls/rgw/cls_rgw_ops.h" +#include "cls/rgw/cls_rgw_client.h" #include "include/rados/librados.hpp" #include "common/debug.h" @@ -157,6 +158,44 @@ int cls_rgw_get_dir_header(IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *he return r; } +class GetDirHeaderCompletion : public ObjectOperationCompletion { + RGWGetDirHeader_CB *ret_ctx; +public: + GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {} + ~GetDirHeaderCompletion() { + ret_ctx->put(); + } + void handle_completion(int r, bufferlist& outbl) { + struct rgw_cls_list_ret ret; + try { + bufferlist::iterator iter = outbl.begin(); + ::decode(ret, iter); + } catch (buffer::error& err) { + r = -EIO; + } + + ret_ctx->handle_response(r, ret.dir.header); + }; +}; + +int cls_rgw_get_dir_header_async(IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx) +{ + bufferlist in, out; + struct rgw_cls_list_op call; + call.num_entries = 0; + ::encode(call, in); + ObjectReadOperation op; + GetDirHeaderCompletion *cb = new GetDirHeaderCompletion(ctx); + op.exec("rgw", "bucket_list", in, cb); + AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); + int r = io_ctx.aio_operate(oid, c, &op, NULL); + c->release(); + if (r < 0) + return r; + + return 0; +} + int cls_rgw_bi_log_list(IoCtx& io_ctx, string& oid, string& marker, uint32_t max, list<rgw_bi_log_entry>& entries, bool *truncated) { diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 2ea5d9ca771..39bb3c9fc4a 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -4,6 +4,13 @@ #include "include/types.h" #include "include/rados/librados.hpp" #include "cls_rgw_types.h" +#include "common/RefCountedObj.h" + +class RGWGetDirHeader_CB : public RefCountedObject { +public: + virtual ~RGWGetDirHeader_CB() {} + virtual void handle_response(int r, rgw_bucket_dir_header& header) = 0; +}; /* bucket index */ void cls_rgw_bucket_init(librados::ObjectWriteOperation& o); @@ -27,6 +34,7 @@ int cls_rgw_bucket_check_index_op(librados::IoCtx& io_ctx, string& oid, int cls_rgw_bucket_rebuild_index_op(librados::IoCtx& io_ctx, string& oid); int cls_rgw_get_dir_header(librados::IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *header); +int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx); void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, bufferlist& updates); diff --git a/src/common/Formatter.h b/src/common/Formatter.h index 27089ce04f2..ac68b7f461d 100644 --- a/src/common/Formatter.h +++ b/src/common/Formatter.h @@ -44,6 +44,9 @@ class Formatter { virtual void dump_int(const char *name, int64_t s) = 0; virtual void dump_float(const char *name, double d) = 0; virtual void dump_string(const char *name, std::string s) = 0; + virtual void dump_bool(const char *name, bool b) { + dump_format_unquoted(name, "%s", (b ? "true" : "false")); + } virtual std::ostream& dump_stream(const char *name) = 0; virtual void dump_format(const char *name, const char *fmt, ...) = 0; virtual void dump_format_unquoted(const char *name, const char *fmt, ...) = 0; diff --git a/src/common/common_init.cc b/src/common/common_init.cc index ef8cf010072..8fb688cd8d3 100644 --- a/src/common/common_init.cc +++ b/src/common/common_init.cc @@ -73,8 +73,11 @@ CephContext *common_preinit(const CephInitParameters &iparams, break; } - if ((flags & CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS) || - code_env != CODE_ENVIRONMENT_DAEMON) { + if (flags & CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS) { + // do nothing special! we used to do no default log, pid_file, + // admin_socket, but changed our minds. let's make ceph-fuse + // and radosgw use the same defaults as ceph-{osd,mon,mds,...} + } else if (code_env != CODE_ENVIRONMENT_DAEMON) { // no default log, pid_file, admin_socket conf->set_val_or_die("pid_file", ""); conf->set_val_or_die("admin_socket", ""); diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 2d3f981379b..b419dec88b5 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -203,7 +203,7 @@ OPTION(mon_leveldb_bloom_size, OPT_INT, 0) // monitor's leveldb bloom bits per e OPTION(mon_leveldb_max_open_files, OPT_INT, 0) // monitor's leveldb max open files OPTION(mon_leveldb_compression, OPT_BOOL, false) // monitor's leveldb uses compression OPTION(mon_leveldb_paranoid, OPT_BOOL, false) // monitor's leveldb paranoid flag -OPTION(mon_leveldb_log, OPT_STR, "") +OPTION(mon_leveldb_log, OPT_STR, "/dev/null") OPTION(mon_leveldb_size_warn, OPT_U64, 40*1024*1024*1024) // issue a warning when the monitor's leveldb goes over 40GB (in bytes) OPTION(paxos_stash_full_interval, OPT_INT, 25) // how often (in commits) to stash a full copy of the PaxosService state OPTION(paxos_max_join_drift, OPT_INT, 10) // max paxos iterations before we must first sync the monitor stores @@ -492,7 +492,7 @@ OPTION(osd_leveldb_bloom_size, OPT_INT, 0) // OSD's leveldb bloom bits per entry OPTION(osd_leveldb_max_open_files, OPT_INT, 0) // OSD's leveldb max open files OPTION(osd_leveldb_compression, OPT_BOOL, true) // OSD's leveldb uses compression OPTION(osd_leveldb_paranoid, OPT_BOOL, false) // OSD's leveldb paranoid flag -OPTION(osd_leveldb_log, OPT_STR, "") // enable OSD leveldb log file +OPTION(osd_leveldb_log, OPT_STR, "/dev/null") // enable OSD leveldb log file // determines whether PGLog::check() compares written out log to stored log OPTION(osd_debug_pg_log_writeout, OPT_BOOL, false) @@ -721,6 +721,10 @@ OPTION(rgw_data_log_num_shards, OPT_INT, 128) // number of objects to keep data OPTION(rgw_data_log_obj_prefix, OPT_STR, "data_log") // OPTION(rgw_replica_log_obj_prefix, OPT_STR, "replica_log") // +OPTION(rgw_bucket_quota_ttl, OPT_INT, 600) // time for cached bucket stats to be cached within rgw instance +OPTION(rgw_bucket_quota_soft_threshold, OPT_DOUBLE, 0.95) // threshold from which we don't rely on cached info for quota decisions +OPTION(rgw_bucket_quota_cache_size, OPT_INT, 10000) // number of entries in bucket quota cache + OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter // This will be set to true when it is safe to start threads. diff --git a/src/common/lru_map.h b/src/common/lru_map.h index 6e7f7b3786f..1e1acc95f76 100644 --- a/src/common/lru_map.h +++ b/src/common/lru_map.h @@ -21,41 +21,76 @@ class lru_map { size_t max; public: + class UpdateContext { + public: + virtual ~UpdateContext() {} + + /* update should return true if object is updated */ + virtual bool update(V *v) = 0; + }; + + bool _find(const K& key, V *value, UpdateContext *ctx); + void _add(const K& key, V& value); + +public: lru_map(int _max) : lock("lru_map"), max(_max) {} virtual ~lru_map() {} bool find(const K& key, V& value); + + /* + * find_and_update() + * + * - will return true if object is found + * - if ctx is set will return true if object is found and updated + */ + bool find_and_update(const K& key, V *value, UpdateContext *ctx); void add(const K& key, V& value); void erase(const K& key); }; template <class K, class V> -bool lru_map<K, V>::find(const K& key, V& value) +bool lru_map<K, V>::_find(const K& key, V *value, UpdateContext *ctx) { - lock.Lock(); typename std::map<K, entry>::iterator iter = entries.find(key); if (iter == entries.end()) { - lock.Unlock(); return false; } entry& e = iter->second; entries_lru.erase(e.lru_iter); - value = e.value; + bool r = true; + + if (ctx) + r = ctx->update(&e.value); + + if (value) + *value = e.value; entries_lru.push_front(key); e.lru_iter = entries_lru.begin(); - lock.Unlock(); + return r; +} - return true; +template <class K, class V> +bool lru_map<K, V>::find(const K& key, V& value) +{ + Mutex::Locker l(lock); + return _find(key, &value, NULL); } template <class K, class V> -void lru_map<K, V>::add(const K& key, V& value) +bool lru_map<K, V>::find_and_update(const K& key, V *value, UpdateContext *ctx) +{ + Mutex::Locker l(lock); + return _find(key, value, ctx); +} + +template <class K, class V> +void lru_map<K, V>::_add(const K& key, V& value) { - lock.Lock(); typename std::map<K, entry>::iterator iter = entries.find(key); if (iter != entries.end()) { entry& e = iter->second; @@ -74,8 +109,14 @@ void lru_map<K, V>::add(const K& key, V& value) entries.erase(iter); entries_lru.pop_back(); } - - lock.Unlock(); +} + + +template <class K, class V> +void lru_map<K, V>::add(const K& key, V& value) +{ + Mutex::Locker l(lock); + _add(key, value); } template <class K, class V> diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc index ce604fe1e5d..ffdc5402caf 100644 --- a/src/global/signal_handler.cc +++ b/src/global/signal_handler.cc @@ -196,13 +196,13 @@ struct SignalHandler : public Thread { lock.Lock(); int num_fds = 0; fds[num_fds].fd = pipefd[0]; - fds[num_fds].events = POLLIN | POLLOUT | POLLERR; + fds[num_fds].events = POLLIN | POLLERR; fds[num_fds].revents = 0; ++num_fds; for (unsigned i=0; i<32; i++) { if (handlers[i]) { fds[num_fds].fd = handlers[i]->pipefd[0]; - fds[num_fds].events = POLLIN | POLLOUT | POLLERR; + fds[num_fds].events = POLLIN | POLLERR; fds[num_fds].revents = 0; ++num_fds; } diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index 3f6d025ff41..c8de9f9df33 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -789,7 +789,12 @@ namespace librados int cluster_stat(cluster_stat_t& result); int cluster_fsid(std::string *fsid); - /* pool aio */ + /* + * pool aio + * + * It is up to the caller to release the completion handler, even if the pool_create_async() + * and/or pool_delete_async() fails and does not send the async request + */ static PoolAsyncCompletion *pool_async_create_completion(); // -- aio -- diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h index efb89641466..443b2c23a17 100644 --- a/src/librados/PoolAsyncCompletionImpl.h +++ b/src/librados/PoolAsyncCompletionImpl.h @@ -94,6 +94,9 @@ namespace librados { C_PoolAsync_Safe(PoolAsyncCompletionImpl *_c) : c(_c) { c->get(); } + ~C_PoolAsync_Safe() { + c->put(); + } void finish(int r) { c->lock.Lock(); @@ -109,7 +112,7 @@ namespace librados { c->lock.Lock(); } - c->put_unlock(); + c->lock.Unlock(); } }; } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 9dc1229fbb9..0188d418e0d 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -632,7 +632,7 @@ void MDCache::populate_mydir() CDir *dir = strays[i]->get_dirfrag(fg); if (!dir) dir = strays[i]->get_or_open_dirfrag(this, fg); - if (!dir->is_complete()) { + if (dir->get_version() == 0) { dir->fetch(new C_MDS_RetryOpenRoot(this)); return; } @@ -653,6 +653,8 @@ void MDCache::populate_mydir() assert(!open); open = true; mds->queue_waiters(waiting_for_open); + + scan_stray_dir(); } void MDCache::open_foreign_mdsdir(inodeno_t ino, Context *fin) @@ -9135,19 +9137,34 @@ void MDCache::_snaprealm_create_finish(MDRequest *mdr, Mutation *mut, CInode *in // ------------------------------------------------------------------------------- // STRAYS -void MDCache::scan_stray_dir() +struct C_MDC_RetryScanStray : public Context { + MDCache *cache; + dirfrag_t next; + C_MDC_RetryScanStray(MDCache *c, dirfrag_t n) : cache(c), next(n) { } + void finish(int r) { + cache->scan_stray_dir(next); + } +}; + +void MDCache::scan_stray_dir(dirfrag_t next) { - dout(10) << "scan_stray_dir" << dendl; - + dout(10) << "scan_stray_dir " << next << dendl; + list<CDir*> ls; for (int i = 0; i < NUM_STRAY; ++i) { - if (strays[i]) { - strays[i]->get_dirfrags(ls); - } + if (strays[i]->ino() < next.ino) + continue; + strays[i]->get_dirfrags(ls); } for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p) { CDir *dir = *p; + if (dir->dirfrag() < next) + continue; + if (!dir->is_complete()) { + dir->fetch(new C_MDC_RetryScanStray(this, dir->dirfrag())); + return; + } for (CDir::map_t::iterator q = dir->items.begin(); q != dir->items.end(); ++q) { CDentry *dn = q->second; CDentry::linkage_t *dnl = dn->get_projected_linkage(); @@ -9354,8 +9371,12 @@ void MDCache::purge_stray(CDentry *dn) if (in->is_file()) { uint64_t period = (uint64_t)in->inode.layout.fl_object_size * (uint64_t)in->inode.layout.fl_stripe_count; - uint64_t cur_max_size = in->inode.get_max_size(); - uint64_t to = MAX(in->inode.size, cur_max_size); + uint64_t to = in->inode.get_max_size(); + to = MAX(in->inode.size, to); + // when truncating a file, the filer does not delete stripe objects that are + // truncated to zero. so we need to purge stripe objects up to the max size + // the file has ever been. + to = MAX(in->inode.max_size_ever, to); if (to && period) { uint64_t num = (to + period - 1) / period; dout(10) << "purge_stray 0~" << to << " objects 0~" << num diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index d8f2a9486fb..416c6454292 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -870,7 +870,6 @@ public: public: elist<CDentry*> delayed_eval_stray; - void scan_stray_dir(); void eval_stray(CDentry *dn, bool delay=false); void eval_remote(CDentry *dn); @@ -884,11 +883,13 @@ public: eval_stray(dn, delay); } protected: + void scan_stray_dir(dirfrag_t next=dirfrag_t()); void fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Context *fin); void purge_stray(CDentry *dn); void _purge_stray_purged(CDentry *dn, int r=0); void _purge_stray_logged(CDentry *dn, version_t pdv, LogSegment *ls); void _purge_stray_logged_truncate(CDentry *dn, LogSegment *ls); + friend class C_MDC_RetryScanStray; friend class C_MDC_FetchedBacktrace; friend class C_MDC_PurgeStrayLogged; friend class C_MDC_PurgeStrayLoggedTruncate; diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index c2e0bbbe369..83722274981 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -1525,7 +1525,6 @@ void MDS::active_start() mdcache->open_root(); mdcache->clean_open_file_lists(); - mdcache->scan_stray_dir(); mdcache->export_remaining_imported_caps(); finish_contexts(g_ceph_context, waiting_for_replay); // kick waiters finish_contexts(g_ceph_context, waiting_for_active); // kick waiters diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 869f3773441..41862847e27 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3086,6 +3086,7 @@ void Server::handle_client_file_readlock(MDRequest *mdr) checking_lock.length = req->head.args.filelock_change.length; checking_lock.client = req->get_orig_source().num(); checking_lock.pid = req->head.args.filelock_change.pid; + checking_lock.pid_namespace = req->head.args.filelock_change.pid_namespace; checking_lock.type = req->head.args.filelock_change.type; // get the appropriate lock state diff --git a/src/mds/flock.h b/src/mds/flock.h index ae93d1660f0..b767fe58507 100644 --- a/src/mds/flock.h +++ b/src/mds/flock.h @@ -12,7 +12,7 @@ inline ostream& operator<<(ostream& out, ceph_filelock& l) { out << "start: " << l.start << ", length: " << l.length << ", client: " << l.client << ", pid: " << l.pid - << ", type: " << (int)l.type + << ", pid_ns: " << l.pid_namespace << ", type: " << (int)l.type << std::endl; return out; } diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 6886786f27e..362f74774c4 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -204,7 +204,7 @@ ostream& operator<<(ostream& out, const client_writeable_range_t& r) */ void inode_t::encode(bufferlist &bl) const { - ENCODE_START(7, 6, bl); + ENCODE_START(8, 6, bl); ::encode(ino, bl); ::encode(rdev, bl); @@ -238,6 +238,7 @@ void inode_t::encode(bufferlist &bl) const ::encode(xattr_version, bl); ::encode(backtrace_version, bl); ::encode(old_pools, bl); + ::encode(max_size_ever, bl); ENCODE_FINISH(bl); } @@ -294,6 +295,8 @@ void inode_t::decode(bufferlist::iterator &p) ::decode(backtrace_version, p); if (struct_v >= 7) ::decode(old_pools, p); + if (struct_v >= 8) + ::decode(max_size_ever, p); DECODE_FINISH(p); } diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 2a3874818b7..bd53c85b48d 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -329,6 +329,7 @@ struct inode_t { ceph_file_layout layout; vector <int64_t> old_pools; uint64_t size; // on directory, # dentries + uint64_t max_size_ever; // max size the file has ever been uint32_t truncate_seq; uint64_t truncate_size, truncate_from; uint32_t truncate_pending; @@ -353,7 +354,8 @@ struct inode_t { inode_t() : ino(0), rdev(0), mode(0), uid(0), gid(0), nlink(0), anchored(false), - size(0), truncate_seq(0), truncate_size(0), truncate_from(0), + size(0), max_size_ever(0), + truncate_seq(0), truncate_size(0), truncate_from(0), truncate_pending(0), time_warp_seq(0), version(0), file_data_version(0), xattr_version(0), backtrace_version(0) { @@ -369,6 +371,8 @@ struct inode_t { bool is_truncating() const { return (truncate_pending > 0); } void truncate(uint64_t old_size, uint64_t new_size) { assert(new_size < old_size); + if (old_size > max_size_ever) + max_size_ever = old_size; truncate_from = old_size; size = new_size; rstat.rbytes = new_size; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 48c1c99d584..b865c379d1a 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -951,21 +951,44 @@ bool MDSMonitor::prepare_command(MMonCommand *m) } } } else if (prefix == "mds add_data_pool") { - int64_t poolid; - cmd_getval(g_ceph_context, cmdmap, "poolid", poolid); - pending_mdsmap.add_data_pool(poolid); - ss << "added data pool " << poolid << " to mdsmap"; - r = 0; - - } else if (prefix == "mds remove_data_pool") { - int64_t poolid; - cmd_getval(g_ceph_context, cmdmap, "poolid", poolid); - r = pending_mdsmap.remove_data_pool(poolid); - if (r == -ENOENT) + string poolname; + cmd_getval(g_ceph_context, cmdmap, "pool", poolname); + int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname); + if (poolid < 0) { + string err; + poolid = strict_strtol(poolname.c_str(), 10, &err); + if (err.length()) { + r = -ENOENT; + poolid = -1; + ss << "pool '" << poolname << "' does not exist"; + } + } + if (poolid >= 0) { + pending_mdsmap.add_data_pool(poolid); + ss << "added data pool " << poolid << " to mdsmap"; r = 0; - if (r == 0) - ss << "removed data pool " << poolid << " from mdsmap"; - + } + } else if (prefix == "mds remove_data_pool") { + string poolname; + cmd_getval(g_ceph_context, cmdmap, "pool", poolname); + int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname); + if (poolid < 0) { + string err; + poolid = strict_strtol(poolname.c_str(), 10, &err); + if (err.length()) { + r = -ENOENT; + poolid = -1; + ss << "pool '" << poolname << "' does not exist"; + } + } + if (poolid >= 0) { + cmd_getval(g_ceph_context, cmdmap, "poolid", poolid); + r = pending_mdsmap.remove_data_pool(poolid); + if (r == -ENOENT) + r = 0; + if (r == 0) + ss << "removed data pool " << poolid << " from mdsmap"; + } } else if (prefix == "mds newfs") { MDSMap newmap; int64_t metadata, data; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 33e00a98d30..ae6bffe0d7d 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -284,11 +284,11 @@ COMMAND("mds unset " \ "name=sure,type=CephString,req=false", \ "unset <key>", "mds", "w", "cli,rest") COMMAND("mds add_data_pool " \ - "name=poolid,type=CephInt,range=0", \ - "add data pool <poolid>", "mds", "rw", "cli,rest") + "name=pool,type=CephString", \ + "add data pool <pool>", "mds", "rw", "cli,rest") COMMAND("mds remove_data_pool " \ - "name=poolid,type=CephInt,range=0", \ - "remove data pool <poolid>", "mds", "rw", "cli,rest") + "name=pool,type=CephString", \ + "remove data pool <pool>", "mds", "rw", "cli,rest") COMMAND("mds newfs " \ "name=metadata,type=CephInt,range=0 " \ "name=data,type=CephInt,range=0 " \ @@ -507,8 +507,8 @@ COMMAND("osd pool get " \ "get pool parameter <var>", "osd", "r", "cli,rest") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset " \ - "name=val,type=CephInt", \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool " \ + "name=val,type=CephString", \ "set pool parameter <var> to <val>", "osd", "rw", "cli,rest") // 'val' is a CephString because it can include a unit. Perhaps // there should be a Python type for validation/conversion of strings diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 9144736d801..425375b29e2 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2618,6 +2618,125 @@ void OSDMonitor::parse_loc_map(const vector<string>& args, map<string,string> * } } +int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap, + stringstream& ss) +{ + string poolstr; + cmd_getval(g_ceph_context, cmdmap, "pool", poolstr); + int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str()); + if (pool < 0) { + ss << "unrecognized pool '" << poolstr << "'"; + return -ENOENT; + } + string var; + cmd_getval(g_ceph_context, cmdmap, "var", var); + + pg_pool_t p = *osdmap.get_pg_pool(pool); + if (pending_inc.new_pools.count(pool)) + p = pending_inc.new_pools[pool]; + + // accept val as a json string or int, and parse out int or float + // values from the string as needed + string val; + cmd_getval(g_ceph_context, cmdmap, "val", val); + string interr; + int64_t n = 0; + if (!cmd_getval(g_ceph_context, cmdmap, "val", n)) + n = strict_strtoll(val.c_str(), 10, &interr); + string floaterr; + float f; + if (!cmd_getval(g_ceph_context, cmdmap, "val", f)) + f = strict_strtod(val.c_str(), &floaterr); + + if (var == "size") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + if (n == 0 || n > 10) { + ss << "pool size must be between 1 and 10"; + return -EINVAL; + } + p.size = n; + if (n < p.min_size) + p.min_size = n; + ss << "set pool " << pool << " size to " << n; + } else if (var == "min_size") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + p.min_size = n; + ss << "set pool " << pool << " min_size to " << n; + } else if (var == "crash_replay_interval") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + p.crash_replay_interval = n; + ss << "set pool " << pool << " to crash_replay_interval to " << n; + } else if (var == "pg_num") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + if (n <= (int)p.get_pg_num()) { + ss << "specified pg_num " << n << " <= current " << p.get_pg_num(); + } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { + ss << "currently creating pgs, wait"; + return -EAGAIN; + } else { + p.set_pg_num(n); + ss << "set pool " << pool << " pg_num to " << n; + } + } else if (var == "pgp_num") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + if (n > (int)p.get_pg_num()) { + ss << "specified pgp_num " << n << " > pg_num " << p.get_pg_num(); + } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { + ss << "still creating pgs, wait"; + return -EAGAIN; + } else { + p.set_pgp_num(n); + ss << "set pool " << pool << " pgp_num to " << n; + } + } else if (var == "crush_ruleset") { + if (interr.length()) { + ss << "error parsing integer value '" << val << "': " << interr; + return -EINVAL; + } + if (osdmap.crush->rule_exists(n)) { + p.crush_ruleset = n; + ss << "set pool " << pool << " crush_ruleset to " << n; + } else { + ss << "crush ruleset " << n << " does not exist"; + return -ENOENT; + } + } else if (var == "hashpspool") { + if (val == "true") { + p.flags |= pg_pool_t::FLAG_HASHPSPOOL; + ss << "set"; + } else if (val == "false") { + p.flags ^= pg_pool_t::FLAG_HASHPSPOOL; + ss << "unset"; + } else { + ss << "expecting value true or false"; + return -EINVAL; + } + ss << " pool " << pool << " flag hashpspool"; + } else { + ss << "unrecognized variable '" << var << "'"; + return -EINVAL; + } + + p.last_change = pending_inc.epoch; + pending_inc.new_pools[pool] = p; + return 0; +} + bool OSDMonitor::prepare_command(MMonCommand *m) { bool ret = false; @@ -3586,73 +3705,13 @@ done: return true; } } else if (prefix == "osd pool set") { - // set a pool variable to a positive int - string poolstr; - cmd_getval(g_ceph_context, cmdmap, "pool", poolstr); - int64_t pool = osdmap.lookup_pg_pool_name(poolstr.c_str()); - if (pool < 0) { - ss << "unrecognized pool '" << poolstr << "'"; - err = -ENOENT; - } else { - const pg_pool_t *p = osdmap.get_pg_pool(pool); - int64_t n; - cmd_getval(g_ceph_context, cmdmap, "val", n); - string var; - cmd_getval(g_ceph_context, cmdmap, "var", var); - if (var == "size") { - if (n == 0 || n > 10) { - ss << "pool size must be between 1 and 10"; - err = -EINVAL; - goto reply; - } - pending_inc.get_new_pool(pool, p)->size = n; - if (n < p->min_size) - pending_inc.get_new_pool(pool, p)->min_size = n; - ss << "set pool " << pool << " size to " << n; - } else if (var == "min_size") { - pending_inc.get_new_pool(pool, p)->min_size = n; - ss << "set pool " << pool << " min_size to " << n; - } else if (var == "crash_replay_interval") { - pending_inc.get_new_pool(pool, p)->crash_replay_interval = n; - ss << "set pool " << pool << " to crash_replay_interval to " << n; - } else if (var == "pg_num") { - if (n <= p->get_pg_num()) { - ss << "specified pg_num " << n << " <= current " << p->get_pg_num(); - err = -EINVAL; - } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { - ss << "busy creating pgs; try again later"; - err = -EAGAIN; - } else { - pending_inc.get_new_pool(pool, p)->set_pg_num(n); - ss << "set pool " << pool << " pg_num to " << n; - } - } else if (var == "pgp_num") { - if (n > p->get_pg_num()) { - ss << "specified pgp_num " << n << " > pg_num " << p->get_pg_num(); - } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { - ss << "busy creating pgs; try again later"; - err = -EAGAIN; - } else { - pending_inc.get_new_pool(pool, p)->set_pgp_num(n); - ss << "set pool " << pool << " pgp_num to " << n; - } - } else if (var == "crush_ruleset") { - if (osdmap.crush->rule_exists(n)) { - pending_inc.get_new_pool(pool, p)->crush_ruleset = n; - ss << "set pool " << pool << " crush_ruleset to " << n; - } else { - ss << "crush ruleset " << n << " does not exist"; - err = -ENOENT; - } - } else { - err = -EINVAL; - goto reply; - } - pending_inc.get_new_pool(pool, p)->last_change = pending_inc.epoch; - getline(ss, rs); - wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed())); - return true; - } + err = prepare_command_pool_set(cmdmap, ss); + if (err < 0) + goto reply; + + getline(ss, rs); + wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed())); + return true; } else if (prefix == "osd tier add") { string poolstr; cmd_getval(g_ceph_context, cmdmap, "pool", poolstr); diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 304f9c4f609..439c8435055 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -324,6 +324,9 @@ private: bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m); + int prepare_command_pool_set(map<string,cmd_vartype> &cmdmap, + stringstream& ss); + void handle_osd_timeouts(const utime_t &now, std::map<int,utime_t> &last_osd_report); void mark_all_down(); diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 3506c4a4ccd..6940dff1405 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -201,7 +201,9 @@ int FileStore::lfn_open(coll_t cid, IndexedPath *path, Index *index) { - assert(get_allow_sharded_objects() || oid.shard_id == ghobject_t::NO_SHARD); + assert(get_allow_sharded_objects() || + ( oid.shard_id == ghobject_t::NO_SHARD && + oid.generation == ghobject_t::NO_GEN )); assert(outfd); int flags = O_RDWR; if (create) @@ -2585,8 +2587,10 @@ int FileStore::fiemap(coll_t cid, const ghobject_t& oid, if (r < 0) goto done; - if (fiemap->fm_mapped_extents == 0) + if (fiemap->fm_mapped_extents == 0) { + free(fiemap); goto done; + } struct fiemap_extent *extent = &fiemap->fm_extents[0]; @@ -2620,6 +2624,7 @@ int FileStore::fiemap(coll_t cid, const ghobject_t& oid, i++; extent++; } + free(fiemap); } done: @@ -2629,7 +2634,6 @@ done: } dout(10) << "fiemap " << cid << "/" << oid << " " << offset << "~" << len << " = " << r << " num_extents=" << exomap.size() << " " << exomap << dendl; - free(fiemap); assert(!m_filestore_fail_eio || r != -EIO); return r; } diff --git a/src/os/GenericFileStoreBackend.cc b/src/os/GenericFileStoreBackend.cc index 81d896a0943..f19ba7d7760 100644 --- a/src/os/GenericFileStoreBackend.cc +++ b/src/os/GenericFileStoreBackend.cc @@ -124,12 +124,12 @@ int GenericFileStoreBackend::detect_features() dout(0) << "detect_features: FIEMAP ioctl is supported and appears to work" << dendl; ioctl_fiemap = true; } + free(fiemap); } if (!m_filestore_fiemap) { dout(0) << "detect_features: FIEMAP ioctl is disabled via 'filestore fiemap' config option" << dendl; ioctl_fiemap = false; } - free(fiemap); ::unlink(fn); TEMP_FAILURE_RETRY(::close(fd)); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index b2aa2ebbcd2..d450fd543e6 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3278,13 +3278,15 @@ bool remove_dir( ObjectStore *store, SnapMapper *mapper, OSDriver *osdriver, ObjectStore::Sequencer *osr, - coll_t coll, DeletingStateRef dstate) + coll_t coll, DeletingStateRef dstate, + ThreadPool::TPHandle &handle) { vector<ghobject_t> olist; int64_t num = 0; ObjectStore::Transaction *t = new ObjectStore::Transaction; ghobject_t next; while (!next.is_max()) { + handle.reset_tp_timeout(); store->collection_list_partial( coll, next, @@ -3306,7 +3308,9 @@ bool remove_dir( C_SaferCond waiter; store->queue_transaction(osr, t, &waiter); bool cont = dstate->pause_clearing(); + handle.suspend_tp_timeout(); waiter.wait(); + handle.reset_tp_timeout(); if (cont) cont = dstate->resume_clearing(); delete t; @@ -3322,14 +3326,18 @@ bool remove_dir( C_SaferCond waiter; store->queue_transaction(osr, t, &waiter); bool cont = dstate->pause_clearing(); + handle.suspend_tp_timeout(); waiter.wait(); + handle.reset_tp_timeout(); if (cont) cont = dstate->resume_clearing(); delete t; return cont; } -void OSD::RemoveWQ::_process(pair<PGRef, DeletingStateRef> item) +void OSD::RemoveWQ::_process( + pair<PGRef, DeletingStateRef> item, + ThreadPool::TPHandle &handle) { PGRef pg(item.first); SnapMapper &mapper = pg->snap_mapper; @@ -3346,7 +3354,8 @@ void OSD::RemoveWQ::_process(pair<PGRef, DeletingStateRef> item) i != colls_to_remove.end(); ++i) { bool cont = remove_dir( - pg->cct, store, &mapper, &driver, pg->osr.get(), *i, item.second); + pg->cct, store, &mapper, &driver, pg->osr.get(), *i, item.second, + handle); if (!cont) return; } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 9346cee6890..f7559da3be5 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1681,7 +1681,7 @@ protected: remove_queue.pop_front(); return item; } - void _process(pair<PGRef, DeletingStateRef>); + void _process(pair<PGRef, DeletingStateRef>, ThreadPool::TPHandle &); void _clear() { remove_queue.clear(); } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 27f7b171677..1a9dde665cf 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -655,6 +655,7 @@ void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o) void pg_pool_t::dump(Formatter *f) const { f->dump_unsigned("flags", get_flags()); + f->dump_string("flags_names", get_flags_string()); f->dump_int("type", get_type()); f->dump_int("size", get_size()); f->dump_int("min_size", get_min_size()); @@ -1054,7 +1055,7 @@ ostream& operator<<(ostream& out, const pg_pool_t& p) << " last_change " << p.get_last_change() << " owner " << p.get_auid(); if (p.flags) - out << " flags " << p.flags; + out << " flags " << p.get_flags_string(); if (p.crash_replay_interval) out << " crash_replay_interval " << p.crash_replay_interval; if (p.quota_max_bytes) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a54fc65f375..8ceeb539c1a 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -725,6 +725,28 @@ struct pg_pool_t { FLAG_FULL = 2, // pool is full }; + static const char *get_flag_name(int f) { + switch (f) { + case FLAG_HASHPSPOOL: return "hashpspool"; + case FLAG_FULL: return "full"; + default: return "???"; + } + } + static string get_flags_string(uint64_t f) { + string s; + for (unsigned n=0; f && n<64; ++n) { + if (f & (1ull << n)) { + if (s.length()) + s += ","; + s += get_flag_name(1ull << n); + } + } + return s; + } + string get_flags_string() const { + return get_flags_string(flags); + } + typedef enum { CACHEMODE_NONE = 0, ///< no caching CACHEMODE_WRITEBACK = 1, ///< write to cache, flush later diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 1196633276d..938c97a4f31 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -386,7 +386,6 @@ struct ObjectOperation { pwatchers->push_back(ow); } } - *prval = 0; } catch (buffer::error& e) { if (prval) @@ -424,8 +423,6 @@ struct ObjectOperation { } psnaps->seq = resp.seq; } - if (prval) - *prval = 0; } catch (buffer::error& e) { if (prval) @@ -617,10 +614,9 @@ struct ObjectOperation { } ::decode(*cursor, p); } catch (buffer::error& e) { - r = -EIO; + if (prval) + *prval = -EIO; } - if (prval) - *prval = r; } }; @@ -664,10 +660,9 @@ struct ObjectOperation { if (pisdirty) *pisdirty = isdirty; } catch (buffer::error& e) { - r = -EIO; + if (prval) + *prval = -EIO; } - if (prval) - *prval = r; } }; diff --git a/src/rgw/Makefile.am b/src/rgw/Makefile.am index 24060b52e25..b92c35e08d6 100644 --- a/src/rgw/Makefile.am +++ b/src/rgw/Makefile.am @@ -31,7 +31,8 @@ librgw_la_SOURCES = \ rgw/rgw_auth_s3.cc \ rgw/rgw_metadata.cc \ rgw/rgw_replica_log.cc \ - rgw/rgw_keystone.cc + rgw/rgw_keystone.cc \ + rgw/rgw_quota.cc librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS} noinst_LTLIBRARIES += librgw.la @@ -124,6 +125,7 @@ noinst_HEADERS += \ rgw/rgw_http_client.h \ rgw/rgw_swift.h \ rgw/rgw_swift_auth.h \ + rgw/rgw_quota.h \ rgw/rgw_rados.h \ rgw/rgw_replica_log.h \ rgw/rgw_resolve.h \ diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 81abb231b6f..b23bf3ba5d4 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -62,6 +62,9 @@ void _usage() cerr << " bucket check check bucket index\n"; cerr << " object rm remove object\n"; cerr << " object unlink unlink object from bucket index\n"; + cerr << " quota set set quota params\n"; + cerr << " quota enable enable quota\n"; + cerr << " quota disable disable quota\n"; cerr << " region get show region info\n"; cerr << " regions list list all regions set on this cluster\n"; cerr << " region set set region info (requires infile)\n"; @@ -154,6 +157,11 @@ void _usage() cerr << " --yes-i-really-mean-it required for certain operations\n"; cerr << "\n"; cerr << "<date> := \"YYYY-MM-DD[ hh:mm:ss]\"\n"; + cerr << "\nQuota options:\n"; + cerr << " --bucket specified bucket for quota command\n"; + cerr << " --max-objects specify max objects\n"; + cerr << " --max-size specify max size (in bytes)\n"; + cerr << " --quota-scope scope of quota (bucket, user)\n"; cerr << "\n"; generic_client_usage(); } @@ -203,6 +211,9 @@ enum { OPT_OBJECT_RM, OPT_OBJECT_UNLINK, OPT_OBJECT_STAT, + OPT_QUOTA_SET, + OPT_QUOTA_ENABLE, + OPT_QUOTA_DISABLE, OPT_GC_LIST, OPT_GC_PROCESS, OPT_REGION_GET, @@ -253,6 +264,7 @@ static int get_cmd(const char *cmd, const char *prev_cmd, bool *need_more) strcmp(cmd, "opstate") == 0 || strcmp(cmd, "pool") == 0 || strcmp(cmd, "pools") == 0 || + strcmp(cmd, "quota") == 0 || strcmp(cmd, "region") == 0 || strcmp(cmd, "regions") == 0 || strcmp(cmd, "region-map") == 0 || @@ -362,6 +374,13 @@ static int get_cmd(const char *cmd, const char *prev_cmd, bool *need_more) return OPT_REGION_SET; if (strcmp(cmd, "default") == 0) return OPT_REGION_DEFAULT; + } else if (strcmp(prev_cmd, "quota") == 0) { + if (strcmp(cmd, "set") == 0) + return OPT_QUOTA_SET; + if (strcmp(cmd, "enable") == 0) + return OPT_QUOTA_ENABLE; + if (strcmp(cmd, "disable") == 0) + return OPT_QUOTA_DISABLE; } else if (strcmp(prev_cmd, "regions") == 0) { if (strcmp(cmd, "list") == 0) return OPT_REGION_LIST; @@ -660,6 +679,64 @@ static bool dump_string(const char *field_name, bufferlist& bl, Formatter *f) return true; } +void set_quota_info(RGWQuotaInfo& quota, int opt_cmd, int64_t max_size, int64_t max_objects) +{ + switch (opt_cmd) { + case OPT_QUOTA_ENABLE: + quota.enabled = true; + + // falling through on purpose + + case OPT_QUOTA_SET: + if (max_objects >= 0) { + quota.max_objects = max_objects; + } + if (max_size >= 0) { + quota.max_size_kb = rgw_rounded_kb(max_size); + } + break; + case OPT_QUOTA_DISABLE: + quota.enabled = false; + break; + } +} + +int set_bucket_quota(RGWRados *store, int opt_cmd, string& bucket_name, int64_t max_size, int64_t max_objects) +{ + RGWBucketInfo bucket_info; + map<string, bufferlist> attrs; + int r = store->get_bucket_info(NULL, bucket_name, bucket_info, NULL, &attrs); + if (r < 0) { + cerr << "could not get bucket info for bucket=" << bucket_name << ": " << cpp_strerror(-r) << std::endl; + return -r; + } + + set_quota_info(bucket_info.quota, opt_cmd, max_size, max_objects); + + r = store->put_bucket_instance_info(bucket_info, false, 0, &attrs); + if (r < 0) { + cerr << "ERROR: failed writing bucket instance info: " << cpp_strerror(-r) << std::endl; + return -r; + } + return 0; +} + +int set_user_bucket_quota(int opt_cmd, RGWUser& user, RGWUserAdminOpState& op_state, int64_t max_size, int64_t max_objects) +{ + RGWUserInfo& user_info = op_state.get_user_info(); + + set_quota_info(user_info.bucket_quota, opt_cmd, max_size, max_objects); + + op_state.set_bucket_quota(user_info.bucket_quota); + + string err; + int r = user.modify(op_state, &err); + if (r < 0) { + cerr << "ERROR: failed updating user info: " << cpp_strerror(-r) << ": " << err << std::endl; + return -r; + } + return 0; +} int main(int argc, char **argv) { @@ -721,6 +798,10 @@ int main(int argc, char **argv) string replica_log_type_str; ReplicaLogType replica_log_type = ReplicaLog_Invalid; string op_mask_str; + string quota_scope; + + int64_t max_objects = -1; + int64_t max_size = -1; std::string val; std::ostringstream errs; @@ -788,6 +869,10 @@ int main(int argc, char **argv) max_buckets = atoi(val.c_str()); } else if (ceph_argparse_witharg(args, i, &val, "--max-entries", (char*)NULL)) { max_entries = atoi(val.c_str()); + } else if (ceph_argparse_witharg(args, i, &val, "--max-size", (char*)NULL)) { + max_size = (int64_t)atoll(val.c_str()); + } else if (ceph_argparse_witharg(args, i, &val, "--max-objects", (char*)NULL)) { + max_objects = (int64_t)atoll(val.c_str()); } else if (ceph_argparse_witharg(args, i, &val, "--date", "--time", (char*)NULL)) { date = val; if (end_date.empty()) @@ -848,6 +933,8 @@ int main(int argc, char **argv) start_marker = val; } else if (ceph_argparse_witharg(args, i, &val, "--end-marker", (char*)NULL)) { end_marker = val; + } else if (ceph_argparse_witharg(args, i, &val, "--quota-scope", (char*)NULL)) { + quota_scope = val; } else if (ceph_argparse_witharg(args, i, &val, "--replica-log-type", (char*)NULL)) { replica_log_type_str = val; replica_log_type = get_replicalog_type(replica_log_type_str); @@ -2228,5 +2315,28 @@ next: return -ret; } } + + bool quota_op = (opt_cmd == OPT_QUOTA_SET || opt_cmd == OPT_QUOTA_ENABLE || opt_cmd == OPT_QUOTA_DISABLE); + + if (quota_op) { + if (bucket_name.empty() && user_id.empty()) { + cerr << "ERROR: bucket name or uid is required for quota operation" << std::endl; + return EINVAL; + } + + if (!bucket_name.empty()) { + if (!quota_scope.empty() && quota_scope != "bucket") { + cerr << "ERROR: invalid quota scope specification." << std::endl; + return EINVAL; + } + set_bucket_quota(store, opt_cmd, bucket_name, max_size, max_objects); + } else if (!user_id.empty()) { + if (quota_scope != "bucket") { + cerr << "ERROR: only bucket-level user quota can be handled. Please specify --quota-scope=bucket" << std::endl; + return EINVAL; + } + set_user_bucket_quota(opt_cmd, user, user_op, max_size, max_objects); + } + } return 0; } diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 5356417f09a..3267bc51948 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -901,6 +901,7 @@ static int bucket_stats(RGWRados *store, std::string& bucket_name, Formatter *f formatter->dump_int("mtime", mtime); formatter->dump_string("max_marker", max_marker); dump_bucket_usage(stats, formatter); + encode_json("bucket_quota", bucket_info.quota, formatter); formatter->close_section(); return 0; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 2c7c0c716be..baf60001a8b 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -29,6 +29,7 @@ #include "include/utime.h" #include "rgw_acl.h" #include "rgw_cors.h" +#include "rgw_quota.h" #include "cls/version/cls_version_types.h" #include "include/rados/librados.hpp" @@ -90,6 +91,7 @@ using ceph::crypto::MD5; #define RGW_OP_TYPE_WRITE 0x02 #define RGW_OP_TYPE_DELETE 0x04 +#define RGW_OP_TYPE_MODIFY (RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE) #define RGW_OP_TYPE_ALL (RGW_OP_TYPE_READ | RGW_OP_TYPE_WRITE | RGW_OP_TYPE_DELETE) #define RGW_DEFAULT_MAX_BUCKETS 1000 @@ -128,6 +130,7 @@ using ceph::crypto::MD5; #define ERR_NOT_FOUND 2023 #define ERR_PERMANENT_REDIRECT 2024 #define ERR_LOCKED 2025 +#define ERR_QUOTA_EXCEEDED 2026 #define ERR_USER_SUSPENDED 2100 #define ERR_INTERNAL_ERROR 2200 @@ -423,11 +426,12 @@ struct RGWUserInfo __u8 system; string default_placement; list<string> placement_tags; + RGWQuotaInfo bucket_quota; RGWUserInfo() : auid(0), suspended(0), max_buckets(RGW_DEFAULT_MAX_BUCKETS), op_mask(RGW_OP_TYPE_ALL), system(0) {} void encode(bufferlist& bl) const { - ENCODE_START(13, 9, bl); + ENCODE_START(14, 9, bl); ::encode(auid, bl); string access_key; string secret_key; @@ -462,6 +466,7 @@ struct RGWUserInfo ::encode(system, bl); ::encode(default_placement, bl); ::encode(placement_tags, bl); + ::encode(bucket_quota, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { @@ -518,6 +523,9 @@ struct RGWUserInfo ::decode(default_placement, bl); ::decode(placement_tags, bl); /* tags of allowed placement rules */ } + if (struct_v >= 14) { + ::decode(bucket_quota, bl); + } DECODE_FINISH(bl); } void dump(Formatter *f) const; @@ -599,6 +607,10 @@ struct rgw_bucket { void dump(Formatter *f) const; void decode_json(JSONObj *obj); static void generate_test_instances(list<rgw_bucket*>& o); + + bool operator<(const rgw_bucket& b) const { + return name.compare(b.name) < 0; + } }; WRITE_CLASS_ENCODER(rgw_bucket) @@ -661,9 +673,10 @@ struct RGWBucketInfo bool has_instance_obj; RGWObjVersionTracker objv_tracker; /* we don't need to serialize this, for runtime tracking */ obj_version ep_objv; /* entry point object version, for runtime tracking only */ + RGWQuotaInfo quota; void encode(bufferlist& bl) const { - ENCODE_START(8, 4, bl); + ENCODE_START(9, 4, bl); ::encode(bucket, bl); ::encode(owner, bl); ::encode(flags, bl); @@ -672,6 +685,7 @@ struct RGWBucketInfo ::encode(ct, bl); ::encode(placement_rule, bl); ::encode(has_instance_obj, bl); + ::encode(quota, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { @@ -692,6 +706,8 @@ struct RGWBucketInfo ::decode(placement_rule, bl); if (struct_v >= 8) ::decode(has_instance_obj, bl); + if (struct_v >= 9) + ::decode(quota, bl); DECODE_FINISH(bl); } void dump(Formatter *f) const; @@ -754,6 +770,8 @@ struct RGWBucketStats uint64_t num_kb; uint64_t num_kb_rounded; uint64_t num_objects; + + RGWBucketStats() : num_kb(0), num_kb_rounded(0), num_objects(0) {} }; struct req_state; @@ -1213,6 +1231,11 @@ static inline const char *rgw_obj_category_name(RGWObjCategory category) return "unknown"; } +static inline uint64_t rgw_rounded_kb(uint64_t bytes) +{ + return (bytes + 1023) / 1024; +} + extern string rgw_string_unquote(const string& s); extern void parse_csv_string(const string& ival, vector<string>& ovals); extern int parse_key_value(string& in_str, string& key, string& val); diff --git a/src/rgw/rgw_http_errors.h b/src/rgw/rgw_http_errors.h index 6cb9fabf6c0..ba3e522651f 100644 --- a/src/rgw/rgw_http_errors.h +++ b/src/rgw/rgw_http_errors.h @@ -36,6 +36,7 @@ const static struct rgw_http_errors RGW_HTTP_ERRORS[] = { { EPERM, 403, "AccessDenied" }, { ERR_USER_SUSPENDED, 403, "UserSuspended" }, { ERR_REQUEST_TIME_SKEWED, 403, "RequestTimeTooSkewed" }, + { ERR_QUOTA_EXCEEDED, 403, "QuotaExceeded" }, { ENOENT, 404, "NoSuchKey" }, { ERR_NO_SUCH_BUCKET, 404, "NoSuchBucket" }, { ERR_NO_SUCH_UPLOAD, 404, "NoSuchUpload" }, diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc index 189e9ae961e..4d6b25374b9 100644 --- a/src/rgw/rgw_json_enc.cc +++ b/src/rgw/rgw_json_enc.cc @@ -396,6 +396,7 @@ void RGWUserInfo::dump(Formatter *f) const } encode_json("default_placement", default_placement, f); encode_json("placement_tags", placement_tags, f); + encode_json("bucket_quota", bucket_quota, f); } @@ -446,6 +447,21 @@ void RGWUserInfo::decode_json(JSONObj *obj) system = (__u8)sys; JSONDecoder::decode_json("default_placement", default_placement, obj); JSONDecoder::decode_json("placement_tags", placement_tags, obj); + JSONDecoder::decode_json("bucket_quota", bucket_quota, obj); +} + +void RGWQuotaInfo::dump(Formatter *f) const +{ + f->dump_bool("enabled", enabled); + f->dump_int("max_size_kb", max_size_kb); + f->dump_int("max_objects", max_objects); +} + +void RGWQuotaInfo::decode_json(JSONObj *obj) +{ + JSONDecoder::decode_json("max_size_kb", max_size_kb, obj); + JSONDecoder::decode_json("max_objects", max_objects, obj); + JSONDecoder::decode_json("enabled", enabled, obj); } void rgw_bucket::dump(Formatter *f) const @@ -497,6 +513,7 @@ void RGWBucketInfo::dump(Formatter *f) const encode_json("region", region, f); encode_json("placement_rule", placement_rule, f); encode_json("has_instance_obj", has_instance_obj, f); + encode_json("quota", quota, f); } void RGWBucketInfo::decode_json(JSONObj *obj) { @@ -507,6 +524,7 @@ void RGWBucketInfo::decode_json(JSONObj *obj) { JSONDecoder::decode_json("region", region, obj); JSONDecoder::decode_json("placement_rule", placement_rule, obj); JSONDecoder::decode_json("has_instance_obj", has_instance_obj, obj); + JSONDecoder::decode_json("quota", quota, obj); } void RGWObjEnt::dump(Formatter *f) const @@ -673,12 +691,14 @@ void RGWRegionMap::dump(Formatter *f) const { encode_json("regions", regions, f); encode_json("master_region", master_region, f); + encode_json("bucket_quota", bucket_quota, f); } void RGWRegionMap::decode_json(JSONObj *obj) { JSONDecoder::decode_json("regions", regions, obj); JSONDecoder::decode_json("master_region", master_region, obj); + JSONDecoder::decode_json("bucket_quota", bucket_quota, obj); } void RGWMetadataLogInfo::dump(Formatter *f) const diff --git a/src/rgw/rgw_main.cc b/src/rgw/rgw_main.cc index 54db609521c..acaa5deffee 100644 --- a/src/rgw/rgw_main.cc +++ b/src/rgw/rgw_main.cc @@ -357,6 +357,13 @@ void RGWProcess::handle_request(RGWRequest *req) goto done; } + req->log(s, "init op"); + ret = op->init_processing(); + if (ret < 0) { + abort_early(s, op, ret); + goto done; + } + req->log(s, "verifying op mask"); ret = op->verify_op_mask(); if (ret < 0) { diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc index ca5ad3f2e7a..23f73e26531 100644 --- a/src/rgw/rgw_metadata.cc +++ b/src/rgw/rgw_metadata.cc @@ -1,7 +1,7 @@ -#include "rgw_metadata.h" #include "common/ceph_json.h" +#include "rgw_metadata.h" #include "cls/version/cls_version_types.h" #include "rgw_rados.h" diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 114b8709a22..2e07e3fcde6 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -421,6 +421,47 @@ int RGWOp::verify_op_mask() return 0; } +int RGWOp::init_quota() +{ + /* no quota enforcement for system requests */ + if (s->system_request) + return 0; + + /* init quota related stuff */ + if (!(s->user.op_mask & RGW_OP_TYPE_MODIFY)) { + return 0; + } + + /* only interested in object related ops */ + if (s->object_str.empty()) { + return 0; + } + + if (s->bucket_info.quota.enabled) { + bucket_quota = s->bucket_info.quota; + return 0; + } + if (s->user.user_id == s->bucket_owner.get_id()) { + if (s->user.bucket_quota.enabled) { + bucket_quota = s->user.bucket_quota; + return 0; + } + } else { + RGWUserInfo owner_info; + int r = rgw_get_user_info_by_uid(store, s->bucket_info.owner, owner_info); + if (r < 0) + return r; + + if (owner_info.bucket_quota.enabled) { + bucket_quota = owner_info.bucket_quota; + return 0; + } + } + + bucket_quota = store->region_map.bucket_quota; + return 0; +} + static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) { uint8_t flags = 0; if (strcmp(req_meth, "GET") == 0) flags = RGW_CORS_GET; @@ -1363,6 +1404,14 @@ void RGWPutObj::execute() ldout(s->cct, 15) << "supplied_md5=" << supplied_md5 << dendl; } + if (!chunked_upload) { /* with chunked upload we don't know how big is the upload. + we also check sizes at the end anyway */ + ret = store->check_quota(s->bucket, bucket_quota, s->content_length); + if (ret < 0) { + goto done; + } + } + if (supplied_etag) { strncpy(supplied_md5, supplied_etag, sizeof(supplied_md5) - 1); supplied_md5[sizeof(supplied_md5) - 1] = '\0'; @@ -1407,6 +1456,11 @@ void RGWPutObj::execute() s->obj_size = ofs; perfcounter->inc(l_rgw_put_b, s->obj_size); + ret = store->check_quota(s->bucket, bucket_quota, s->obj_size); + if (ret < 0) { + goto done; + } + hash.Final(m); buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); @@ -1604,6 +1658,13 @@ void RGWPutMetadata::execute() } } + map<string, string>::iterator giter; + for (giter = s->generic_attrs.begin(); giter != s->generic_attrs.end(); ++giter) { + bufferlist& attrbl = attrs[giter->first]; + const string& val = giter->second; + attrbl.append(val.c_str(), val.size() + 1); + } + if (has_policy) { policy.encode(bl); attrs[RGW_ATTR_ACL] = bl; diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 948a11830c2..eee5ea99065 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -20,6 +20,7 @@ #include "rgw_bucket.h" #include "rgw_acl.h" #include "rgw_cors.h" +#include "rgw_quota.h" using namespace std; @@ -36,10 +37,21 @@ protected: RGWRados *store; RGWCORSConfiguration bucket_cors; bool cors_exist; + RGWQuotaInfo bucket_quota; + + virtual int init_quota(); public: RGWOp() : s(NULL), dialect_handler(NULL), store(NULL), cors_exist(false) {} virtual ~RGWOp() {} + virtual int init_processing() { + int ret = init_quota(); + if (ret < 0) + return ret; + + return 0; + } + virtual void init(RGWRados *store, struct req_state *s, RGWHandler *dialect_handler) { this->store = store; this->s = s; diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc new file mode 100644 index 00000000000..66609ca723c --- /dev/null +++ b/src/rgw/rgw_quota.cc @@ -0,0 +1,332 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include "include/utime.h" +#include "common/lru_map.h" +#include "common/RefCountedObj.h" + +#include "rgw_common.h" +#include "rgw_rados.h" +#include "rgw_quota.h" + +#define dout_subsys ceph_subsys_rgw + + +struct RGWQuotaBucketStats { + RGWBucketStats stats; + utime_t expiration; + utime_t async_refresh_time; +}; + +class RGWBucketStatsCache { + RGWRados *store; + lru_map<rgw_bucket, RGWQuotaBucketStats> stats_map; + RefCountedWaitObject *async_refcount; + + int fetch_bucket_totals(rgw_bucket& bucket, RGWBucketStats& stats); + +public: + RGWBucketStatsCache(RGWRados *_store) : store(_store), stats_map(store->ctx()->_conf->rgw_bucket_quota_cache_size) { + async_refcount = new RefCountedWaitObject; + } + ~RGWBucketStatsCache() { + async_refcount->put_wait(); /* wait for all pending async requests to complete */ + } + + int get_bucket_stats(rgw_bucket& bucket, RGWBucketStats& stats, RGWQuotaInfo& quota); + void adjust_bucket_stats(rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes); + + bool can_use_cached_stats(RGWQuotaInfo& quota, RGWBucketStats& stats); + + void set_stats(rgw_bucket& bucket, RGWQuotaBucketStats& qs, RGWBucketStats& stats); + int async_refresh(rgw_bucket& bucket, RGWQuotaBucketStats& qs); + void async_refresh_response(rgw_bucket& bucket, RGWBucketStats& stats); +}; + +bool RGWBucketStatsCache::can_use_cached_stats(RGWQuotaInfo& quota, RGWBucketStats& cached_stats) +{ + if (quota.max_size_kb >= 0) { + if (quota.max_size_soft_threshold < 0) { + quota.max_size_soft_threshold = quota.max_size_kb * store->ctx()->_conf->rgw_bucket_quota_soft_threshold; + } + + if (cached_stats.num_kb_rounded >= (uint64_t)quota.max_size_soft_threshold) { + ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (size): " + << cached_stats.num_kb_rounded << " >= " << quota.max_size_soft_threshold << dendl; + return false; + } + } + + if (quota.max_objects >= 0) { + if (quota.max_objs_soft_threshold < 0) { + quota.max_objs_soft_threshold = quota.max_objects * store->ctx()->_conf->rgw_bucket_quota_soft_threshold; + } + + if (cached_stats.num_objects >= (uint64_t)quota.max_objs_soft_threshold) { + ldout(store->ctx(), 20) << "quota: can't use cached stats, exceeded soft threshold (num objs): " + << cached_stats.num_objects << " >= " << quota.max_objs_soft_threshold << dendl; + return false; + } + } + + return true; +} + +int RGWBucketStatsCache::fetch_bucket_totals(rgw_bucket& bucket, RGWBucketStats& stats) +{ + RGWBucketInfo bucket_info; + + uint64_t bucket_ver; + uint64_t master_ver; + + map<RGWObjCategory, RGWBucketStats> bucket_stats; + int r = store->get_bucket_stats(bucket, &bucket_ver, &master_ver, bucket_stats, NULL); + if (r < 0) { + ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl; + return r; + } + + stats = RGWBucketStats(); + + map<RGWObjCategory, RGWBucketStats>::iterator iter; + for (iter = bucket_stats.begin(); iter != bucket_stats.end(); ++iter) { + RGWBucketStats& s = iter->second; + stats.num_kb += s.num_kb; + stats.num_kb_rounded += s.num_kb_rounded; + stats.num_objects += s.num_objects; + } + + return 0; +} + +class AsyncRefreshHandler : public RGWGetBucketStats_CB { + RGWRados *store; + RGWBucketStatsCache *cache; +public: + AsyncRefreshHandler(RGWRados *_store, RGWBucketStatsCache *_cache, rgw_bucket& _bucket) : RGWGetBucketStats_CB(_bucket), store(_store), cache(_cache) {} + + int init_fetch(); + + void handle_response(int r); +}; + + +int AsyncRefreshHandler::init_fetch() +{ + ldout(store->ctx(), 20) << "initiating async quota refresh for bucket=" << bucket << dendl; + map<RGWObjCategory, RGWBucketStats> bucket_stats; + int r = store->get_bucket_stats_async(bucket, this); + if (r < 0) { + ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl; + + /* get_bucket_stats_async() dropped our reference already */ + return r; + } + + return 0; +} + +void AsyncRefreshHandler::handle_response(int r) +{ + if (r < 0) { + ldout(store->ctx(), 20) << "AsyncRefreshHandler::handle_response() r=" << r << dendl; + return; /* nothing to do here */ + } + + RGWBucketStats bs; + + map<RGWObjCategory, RGWBucketStats>::iterator iter; + for (iter = stats->begin(); iter != stats->end(); ++iter) { + RGWBucketStats& s = iter->second; + bs.num_kb += s.num_kb; + bs.num_kb_rounded += s.num_kb_rounded; + bs.num_objects += s.num_objects; + } + + cache->async_refresh_response(bucket, bs); +} + +class RGWBucketStatsAsyncTestSet : public lru_map<rgw_bucket, RGWQuotaBucketStats>::UpdateContext { + int objs_delta; + uint64_t added_bytes; + uint64_t removed_bytes; +public: + RGWBucketStatsAsyncTestSet() {} + bool update(RGWQuotaBucketStats *entry) { + if (entry->async_refresh_time.sec() == 0) + return false; + + entry->async_refresh_time = utime_t(0, 0); + + return true; + } +}; + +int RGWBucketStatsCache::async_refresh(rgw_bucket& bucket, RGWQuotaBucketStats& qs) +{ + /* protect against multiple updates */ + RGWBucketStatsAsyncTestSet test_update; + if (!stats_map.find_and_update(bucket, NULL, &test_update)) { + /* most likely we just raced with another update */ + return 0; + } + + async_refcount->get(); + + AsyncRefreshHandler *handler = new AsyncRefreshHandler(store, this, bucket); + + int ret = handler->init_fetch(); + if (ret < 0) { + async_refcount->put(); + handler->put(); + return ret; + } + + return 0; +} + +void RGWBucketStatsCache::async_refresh_response(rgw_bucket& bucket, RGWBucketStats& stats) +{ + ldout(store->ctx(), 20) << "async stats refresh response for bucket=" << bucket << dendl; + + RGWQuotaBucketStats qs; + + stats_map.find(bucket, qs); + + set_stats(bucket, qs, stats); + + async_refcount->put(); +} + +void RGWBucketStatsCache::set_stats(rgw_bucket& bucket, RGWQuotaBucketStats& qs, RGWBucketStats& stats) +{ + qs.stats = stats; + qs.expiration = ceph_clock_now(store->ctx()); + qs.async_refresh_time = qs.expiration; + qs.expiration += store->ctx()->_conf->rgw_bucket_quota_ttl; + qs.async_refresh_time += store->ctx()->_conf->rgw_bucket_quota_ttl / 2; + + stats_map.add(bucket, qs); +} + +int RGWBucketStatsCache::get_bucket_stats(rgw_bucket& bucket, RGWBucketStats& stats, RGWQuotaInfo& quota) { + RGWQuotaBucketStats qs; + utime_t now = ceph_clock_now(store->ctx()); + if (stats_map.find(bucket, qs)) { + if (qs.async_refresh_time.sec() > 0 && now >= qs.async_refresh_time) { + int r = async_refresh(bucket, qs); + if (r < 0) { + ldout(store->ctx(), 0) << "ERROR: quota async refresh returned ret=" << r << dendl; + + /* continue processing, might be a transient error, async refresh is just optimization */ + } + } + + if (can_use_cached_stats(quota, qs.stats) && qs.expiration > ceph_clock_now(store->ctx())) { + stats = qs.stats; + return 0; + } + } + + int ret = fetch_bucket_totals(bucket, stats); + if (ret < 0 && ret != -ENOENT) + return ret; + + set_stats(bucket, qs, stats); + + return 0; +} + + +class RGWBucketStatsUpdate : public lru_map<rgw_bucket, RGWQuotaBucketStats>::UpdateContext { + int objs_delta; + uint64_t added_bytes; + uint64_t removed_bytes; +public: + RGWBucketStatsUpdate(int _objs_delta, uint64_t _added_bytes, uint64_t _removed_bytes) : + objs_delta(_objs_delta), added_bytes(_added_bytes), removed_bytes(_removed_bytes) {} + bool update(RGWQuotaBucketStats *entry) { + uint64_t rounded_kb_added = rgw_rounded_kb(added_bytes); + uint64_t rounded_kb_removed = rgw_rounded_kb(removed_bytes); + + entry->stats.num_kb_rounded += (rounded_kb_added - rounded_kb_removed); + entry->stats.num_kb += (added_bytes - removed_bytes) / 1024; + entry->stats.num_objects += objs_delta; + + return true; + } +}; + + +void RGWBucketStatsCache::adjust_bucket_stats(rgw_bucket& bucket, int objs_delta, uint64_t added_bytes, uint64_t removed_bytes) +{ + RGWBucketStatsUpdate update(objs_delta, added_bytes, removed_bytes); + stats_map.find_and_update(bucket, NULL, &update); +} + + +class RGWQuotaHandlerImpl : public RGWQuotaHandler { + RGWRados *store; + RGWBucketStatsCache stats_cache; +public: + RGWQuotaHandlerImpl(RGWRados *_store) : store(_store), stats_cache(_store) {} + virtual int check_quota(rgw_bucket& bucket, RGWQuotaInfo& bucket_quota, + uint64_t num_objs, uint64_t size) { + uint64_t size_kb = rgw_rounded_kb(size); + if (!bucket_quota.enabled) { + return 0; + } + + RGWBucketStats stats; + + int ret = stats_cache.get_bucket_stats(bucket, stats, bucket_quota); + if (ret < 0) + return ret; + + ldout(store->ctx(), 20) << "bucket quota: max_objects=" << bucket_quota.max_objects + << " max_size_kb=" << bucket_quota.max_size_kb << dendl; + + if (bucket_quota.max_objects >= 0 && + stats.num_objects + num_objs > (uint64_t)bucket_quota.max_objects) { + ldout(store->ctx(), 10) << "quota exceeded: stats.num_objects=" << stats.num_objects + << " bucket_quota.max_objects=" << bucket_quota.max_objects << dendl; + + return -ERR_QUOTA_EXCEEDED; + } + if (bucket_quota.max_size_kb >= 0 && + stats.num_kb_rounded + size_kb > (uint64_t)bucket_quota.max_size_kb) { + ldout(store->ctx(), 10) << "quota exceeded: stats.num_kb_rounded=" << stats.num_kb_rounded << " size_kb=" << size_kb + << " bucket_quota.max_size_kb=" << bucket_quota.max_size_kb << dendl; + return -ERR_QUOTA_EXCEEDED; + } + + return 0; + } + + virtual void update_stats(rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) { + stats_cache.adjust_bucket_stats(bucket, obj_delta, added_bytes, removed_bytes); + }; +}; + + +RGWQuotaHandler *RGWQuotaHandler::generate_handler(RGWRados *store) +{ + return new RGWQuotaHandlerImpl(store); +}; + +void RGWQuotaHandler::free_handler(RGWQuotaHandler *handler) +{ + delete handler; +} diff --git a/src/rgw/rgw_quota.h b/src/rgw/rgw_quota.h new file mode 100644 index 00000000000..2f8f28e85a2 --- /dev/null +++ b/src/rgw/rgw_quota.h @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RGW_QUOTA_H +#define CEPH_RGW_QUOTA_H + + +#include "include/utime.h" +#include "include/atomic.h" +#include "common/lru_map.h" + +class RGWRados; +class JSONObj; + +struct RGWQuotaInfo { + int64_t max_size_kb; + int64_t max_objects; + bool enabled; + int64_t max_size_soft_threshold; + int64_t max_objs_soft_threshold; + + RGWQuotaInfo() : max_size_kb(-1), max_objects(-1), enabled(false), + max_size_soft_threshold(-1), max_objs_soft_threshold(-1) {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(max_size_kb, bl); + ::encode(max_objects, bl); + ::encode(enabled, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(max_size_kb, bl); + ::decode(max_objects, bl); + ::decode(enabled, bl); + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + + void decode_json(JSONObj *obj); + +}; +WRITE_CLASS_ENCODER(RGWQuotaInfo) + +class rgw_bucket; + +class RGWQuotaHandler { +public: + RGWQuotaHandler() {} + virtual ~RGWQuotaHandler() { + } + virtual int check_quota(rgw_bucket& bucket, RGWQuotaInfo& bucket_quota, + uint64_t num_objs, uint64_t size) = 0; + + virtual void update_stats(rgw_bucket& bucket, int obj_delta, uint64_t added_bytes, uint64_t removed_bytes) = 0; + + static RGWQuotaHandler *generate_handler(RGWRados *store); + static void free_handler(RGWQuotaHandler *handler); +}; + +#endif diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 8b4d18f4e68..9f0a900f3d3 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -357,16 +357,20 @@ int RGWZoneParams::store_info(CephContext *cct, RGWRados *store, RGWRegion& regi } void RGWRegionMap::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); ::encode(regions, bl); ::encode(master_region, bl); + ::encode(bucket_quota, bl); ENCODE_FINISH(bl); } void RGWRegionMap::decode(bufferlist::iterator& bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); ::decode(regions, bl); ::decode(master_region, bl); + + if (struct_v >= 2) + ::decode(bucket_quota, bl); DECODE_FINISH(bl); regions_by_api.clear(); @@ -851,6 +855,7 @@ void RGWRados::finalize() RGWRESTConn *conn = iter->second; delete conn; } + RGWQuotaHandler::free_handler(quota_handler); } /** @@ -962,6 +967,8 @@ int RGWRados::init_complete() if (use_gc_thread) gc->start_processor(); + quota_handler = RGWQuotaHandler::generate_handler(this); + return ret; } @@ -2342,6 +2349,11 @@ int RGWRados::put_obj_meta_impl(void *ctx, rgw_obj& obj, uint64_t size, *mtime = set_mtime; } + if (state) { + /* update quota cache */ + quota_handler->update_stats(bucket, (state->exists ? 0 : 1), size, state->size); + } + return 0; done_cancel: @@ -3211,6 +3223,11 @@ int RGWRados::delete_obj_impl(void *ctx, rgw_obj& obj, RGWObjVersionTracker *obj if (ret_not_existed) return -ENOENT; + if (state) { + /* update quota cache */ + quota_handler->update_stats(bucket, -1, 0, state->size); + } + return 0; } @@ -4598,6 +4615,38 @@ int RGWRados::get_bucket_stats(rgw_bucket& bucket, uint64_t *bucket_ver, uint64_ return 0; } +class RGWGetBucketStatsContext : public RGWGetDirHeader_CB { + RGWGetBucketStats_CB *cb; + +public: + RGWGetBucketStatsContext(RGWGetBucketStats_CB *_cb) : cb(_cb) {} + void handle_response(int r, rgw_bucket_dir_header& header) { + map<RGWObjCategory, RGWBucketStats> stats; + + if (r >= 0) { + translate_raw_stats(header, stats); + cb->set_response(header.ver, header.master_ver, &stats, header.max_marker); + } + + cb->handle_response(r); + + cb->put(); + } +}; + +int RGWRados::get_bucket_stats_async(rgw_bucket& bucket, RGWGetBucketStats_CB *ctx) +{ + RGWGetBucketStatsContext *get_ctx = new RGWGetBucketStatsContext(ctx); + int r = cls_bucket_head_async(bucket, get_ctx); + if (r < 0) { + ctx->put(); + delete get_ctx; + return r; + } + + return 0; +} + void RGWRados::get_bucket_instance_entry(rgw_bucket& bucket, string& entry) { entry = bucket.name + ":" + bucket.bucket_id; @@ -5480,6 +5529,25 @@ int RGWRados::cls_bucket_head(rgw_bucket& bucket, struct rgw_bucket_dir_header& return 0; } +int RGWRados::cls_bucket_head_async(rgw_bucket& bucket, RGWGetDirHeader_CB *ctx) +{ + librados::IoCtx index_ctx; + string oid; + int r = open_bucket_index(bucket, index_ctx, oid); + if (r < 0) + return r; + + r = cls_rgw_get_dir_header_async(index_ctx, oid, ctx); + if (r < 0) + return r; + + return 0; +} + +int RGWRados::check_quota(rgw_bucket& bucket, RGWQuotaInfo& quota_info, uint64_t obj_size) +{ + return quota_handler->check_quota(bucket, quota_info, 1, obj_size); +} class IntentLogNameFilter : public RGWAccessListFilter { diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 65765c414aa..52b898123d4 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -636,6 +636,8 @@ struct RGWRegionMap { string master_region; + RGWQuotaInfo bucket_quota; + RGWRegionMap() : lock("RGWRegionMap") {} void encode(bufferlist& bl) const; @@ -759,6 +761,29 @@ public: int renew_state(); }; +class RGWGetBucketStats_CB : public RefCountedObject { +protected: + rgw_bucket bucket; + uint64_t bucket_ver; + uint64_t master_ver; + map<RGWObjCategory, RGWBucketStats> *stats; + string max_marker; +public: + RGWGetBucketStats_CB(rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {} + virtual ~RGWGetBucketStats_CB() {} + virtual void handle_response(int r) = 0; + virtual void set_response(uint64_t _bucket_ver, uint64_t _master_ver, + map<RGWObjCategory, RGWBucketStats> *_stats, + const string &_max_marker) { + bucket_ver = _bucket_ver; + master_ver = _master_ver; + stats = _stats; + max_marker = _max_marker; + } +}; + +class RGWGetDirHeader_CB; + class RGWRados { @@ -862,6 +887,8 @@ protected: string region_name; string zone_name; + RGWQuotaHandler *quota_handler; + public: RGWRados() : lock("rados_timer_lock"), timer(NULL), gc(NULL), use_gc_thread(false), @@ -870,6 +897,7 @@ public: bucket_id_lock("rados_bucket_id"), max_bucket_id(0), cct(NULL), rados(NULL), pools_initialized(false), + quota_handler(NULL), rest_master_conn(NULL), meta_mgr(NULL), data_log(NULL) {} @@ -1290,6 +1318,7 @@ public: int decode_policy(bufferlist& bl, ACLOwner *owner); int get_bucket_stats(rgw_bucket& bucket, uint64_t *bucket_ver, uint64_t *master_ver, map<RGWObjCategory, RGWBucketStats>& stats, string *max_marker); + int get_bucket_stats_async(rgw_bucket& bucket, RGWGetBucketStats_CB *cb); void get_bucket_instance_obj(rgw_bucket& bucket, rgw_obj& obj); void get_bucket_instance_entry(rgw_bucket& bucket, string& entry); void get_bucket_meta_oid(rgw_bucket& bucket, string& oid); @@ -1321,6 +1350,7 @@ public: map<string, RGWObjEnt>& m, bool *is_truncated, string *last_entry, bool (*force_check_filter)(const string& name) = NULL); int cls_bucket_head(rgw_bucket& bucket, struct rgw_bucket_dir_header& header); + int cls_bucket_head_async(rgw_bucket& bucket, RGWGetDirHeader_CB *ctx); int prepare_update_index(RGWObjState *state, rgw_bucket& bucket, RGWModifyOp op, rgw_obj& oid, string& tag); int complete_update_index(rgw_bucket& bucket, string& oid, string& tag, int64_t poolid, uint64_t epoch, uint64_t size, @@ -1376,6 +1406,8 @@ public: int bucket_rebuild_index(rgw_bucket& bucket); int remove_objs_from_index(rgw_bucket& bucket, list<string>& oid_list); + int check_quota(rgw_bucket& bucket, RGWQuotaInfo& quota_info, uint64_t obj_size); + string unique_id(uint64_t unique_num) { char buf[32]; snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num); diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index 5e5b5c564bb..dc529e3d48d 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -1682,6 +1682,9 @@ int RGWUser::execute_add(RGWUserAdminOpState& op_state, std::string *err_msg) if (op_state.op_mask_specified) user_info.op_mask = op_state.get_op_mask(); + if (op_state.has_bucket_quota()) + user_info.bucket_quota = op_state.get_bucket_quota(); + // update the request op_state.set_user_info(user_info); op_state.set_populated(); @@ -1884,6 +1887,9 @@ int RGWUser::execute_modify(RGWUserAdminOpState& op_state, std::string *err_msg) if (op_state.op_mask_specified) user_info.op_mask = op_state.get_op_mask(); + if (op_state.has_bucket_quota()) + user_info.bucket_quota = op_state.get_bucket_quota(); + if (op_state.has_suspension_op()) { __u8 suspended = op_state.get_suspension_status(); user_info.suspended = suspended; diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h index 32bcf199001..e71b8f81778 100644 --- a/src/rgw/rgw_user.h +++ b/src/rgw/rgw_user.h @@ -172,6 +172,10 @@ struct RGWUserAdminOpState { bool subuser_params_checked; bool user_params_checked; + bool bucket_quota_specified; + + RGWQuotaInfo bucket_quota; + void set_access_key(std::string& access_key) { if (access_key.empty()) return; @@ -285,6 +289,12 @@ struct RGWUserAdminOpState { key_op = true; } + void set_bucket_quota(RGWQuotaInfo& quota) + { + bucket_quota = quota; + bucket_quota_specified = true; + } + bool is_populated() { return populated; }; bool is_initialized() { return initialized; }; bool has_existing_user() { return existing_user; }; @@ -303,6 +313,7 @@ struct RGWUserAdminOpState { bool will_purge_keys() { return purge_keys; }; bool will_purge_data() { return purge_data; }; bool will_generate_subuser() { return gen_subuser; }; + bool has_bucket_quota() { return bucket_quota_specified; } void set_populated() { populated = true; }; void clear_populated() { populated = false; }; void set_initialized() { initialized = true; }; @@ -317,6 +328,7 @@ struct RGWUserAdminOpState { uint32_t get_subuser_perm() { return perm_mask; }; uint32_t get_max_buckets() { return max_buckets; }; uint32_t get_op_mask() { return op_mask; }; + RGWQuotaInfo& get_bucket_quota() { return bucket_quota; } std::string get_user_id() { return user_id; }; std::string get_subuser() { return subuser; }; @@ -403,6 +415,7 @@ struct RGWUserAdminOpState { key_params_checked = false; subuser_params_checked = false; user_params_checked = false; + bucket_quota_specified = false; } }; diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index 2def60107dc..4fe30b1cda7 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -23,6 +23,9 @@ bucket check check bucket index object rm remove object object unlink unlink object from bucket index + quota set set quota params + quota enable enable quota + quota disable disable quota region get show region info regions list list all regions set on this cluster region set set region info (requires infile) @@ -116,6 +119,12 @@ <date> := "YYYY-MM-DD[ hh:mm:ss]" + Quota options: + --bucket specified bucket for quota command + --max-objects specify max objects + --max-size specify max size (in bytes) + --quota-scope scope of quota (bucket, user) + --conf/-c FILE read configuration from the given configuration file --id/-i ID set ID portion of my name --name/-n TYPE.ID set name diff --git a/src/test/encoding/ceph_dencoder.cc b/src/test/encoding/ceph_dencoder.cc index 81abcd1de9e..dbed6f524d8 100644 --- a/src/test/encoding/ceph_dencoder.cc +++ b/src/test/encoding/ceph_dencoder.cc @@ -93,7 +93,7 @@ public: // allow 0- or 1-based (by wrapping) if (i == 0) i = m_list.size(); - if (i > m_list.size()) + if ((i == 0) || (i > m_list.size())) return "invalid id for generated object"; typename list<T*>::iterator p = m_list.begin(); for (i--; i > 0 && p != m_list.end(); ++p, --i) ; @@ -177,7 +177,7 @@ public: // allow 0- or 1-based (by wrapping) if (i == 0) i = m_list.size(); - if (i > m_list.size()) + if ((i == 0) || (i > m_list.size())) return "invalid id for generated object"; typename list<T*>::iterator p = m_list.begin(); for (i--; i > 0 && p != m_list.end(); ++p, --i) ; diff --git a/src/test/filestore/run_seed_to_range.sh b/src/test/filestore/run_seed_to_range.sh index c5b399d7aae..365b34918d2 100755 --- a/src/test/filestore/run_seed_to_range.sh +++ b/src/test/filestore/run_seed_to_range.sh @@ -12,7 +12,7 @@ mydir=`dirname $0` for f in `seq $from $to` do if ! $mydir/run_seed_to.sh $seed $f; then - if -d $dir; then + if [ -d $dir ]; then echo copying evidence to $dir cp -a . $dir else diff --git a/src/test/librados/cmd.cc b/src/test/librados/cmd.cc index 71343f2b908..f47cc9fc7d2 100644 --- a/src/test/librados/cmd.cc +++ b/src/test/librados/cmd.cc @@ -100,8 +100,9 @@ TEST(LibRadosCmd, PGCmd) { string pgid = stringify(poolid) + ".0"; cmd[0] = (char *)"asdfasdf"; - ASSERT_EQ(-22, rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen)); - + // note: tolerate NXIO here in case the cluster is thrashing out underneath us. + int r = rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen); + ASSERT_TRUE(r == -22 || r == -ENXIO); // make sure the pg exists on the osd before we query it rados_ioctx_t io; @@ -114,7 +115,9 @@ TEST(LibRadosCmd, PGCmd) { string qstr = "{\"prefix\":\"pg\", \"cmd\":\"query\", \"pgid\":\"" + pgid + "\"}"; cmd[0] = (char *)qstr.c_str(); - ASSERT_EQ(0, rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen)); + // note: tolerate ENOENT/ENXIO here if hte osd is thrashing out underneath us + r = rados_pg_command(cluster, pgid.c_str(), (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen); + ASSERT_TRUE(r == 0 || r == -ENOENT || r == -ENXIO); ASSERT_LT(0u, buflen); rados_buffer_free(buf); diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index 34bcf698e5a..540f690472b 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -460,10 +460,12 @@ class TestMDS(TestArgparse): 'toomany'])) def test_add_data_pool(self): - self.check_1_natural_arg('mds', 'add_data_pool') + self.assert_valid_command(['mds', 'add_data_pool', '1']) + self.assert_valid_command(['mds', 'add_data_pool', 'foo']) def test_remove_data_pool(self): - self.check_1_natural_arg('mds', 'remove_data_pool') + self.assert_valid_command(['mds', 'remove_data_pool', '1']) + self.assert_valid_command(['mds', 'remove_data_pool', 'foo']) def test_newfs(self): self.assert_valid_command(['mds', 'newfs', '1', '2', @@ -831,7 +833,7 @@ class TestOSD(TestArgparse): uuid, 'toomany'])) - def test_blackist(self): + def test_blacklist(self): for action in ('add', 'rm'): self.assert_valid_command(['osd', 'blacklist', action, '1.2.3.4/567']) @@ -941,22 +943,17 @@ class TestOSD(TestArgparse): def test_pool_set(self): for var in ('size', 'min_size', 'crash_replay_interval', - 'pg_num', 'pgp_num', 'crush_ruleset'): + 'pg_num', 'pgp_num', 'crush_ruleset', + 'hashpspool'): self.assert_valid_command(['osd', 'pool', - 'set', 'poolname', var, '-1']) + 'set', 'poolname', var, 'value']) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'set'])) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'set', 'poolname'])) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'set', 'poolname', - 'size', 'invalid'])) - assert_equal({}, validate_command(sigdict, ['osd', 'pool', - 'set', 'poolname', - 'invalid', '-1'])) - assert_equal({}, validate_command(sigdict, ['osd', 'pool', - 'set', 'poolname', - 'size', '-1', + 'size', 'value', 'toomany'])) def test_pool_set_quota(self): diff --git a/src/vstart.sh b/src/vstart.sh index def480779de..4839cc1156d 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -237,6 +237,7 @@ fi $SUDO rm -f core* test -d out || mkdir out +test -d dev || mkdir dev $SUDO rm -rf out/* test -d gmon && $SUDO rm -rf gmon/* @@ -390,7 +391,7 @@ EOF cmd="rm -rf $CEPH_DEV_DIR/mon.$f" echo $cmd $cmd - cmd="mkdir $CEPH_DEV_DIR/mon.$f" + cmd="mkdir -p $CEPH_DEV_DIR/mon.$f" echo $cmd $cmd cmd="$CEPH_BIN/ceph-mon --mkfs -c $conf -i $f --monmap=$monmap_fn" |