diff options
121 files changed, 2637 insertions, 1252 deletions
@@ -15,11 +15,6 @@ Copyright: Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> License: GPL2 -Files: src/common/fiemap.cc -Copyright: - Copyright (C) 2010 Canonical -License: GPL2 - Files: src/mount/canonicalize.c Copyright: Copyright (C) 1993 Rick Sladkey <jrs@world.std.com> License: LGPL2 or later diff --git a/PendingReleaseNotes b/PendingReleaseNotes index ea06b760ba3..4b6cb800290 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,3 +1,15 @@ v0.67 ~~~~~ +* ceph-osd now requires a max fd limit of at least + filestore_wbthrottle_(xfs|btrfs)_inodes_hard_limit (5000 by default) + in order to accomodate the new write back throttle system. upstart + now sets the fd limit to 32k. sysvinit will set it to 32k by default + (still overrideable via max_open_files). + +* The 'ceph pg <pgid> ...' commands (like 'ceph pg <pgid> query') are + deprecated in favor of 'ceph tell <pgid> ...'. This makes the + distinction between 'ceph pg <command> <pgid>' and 'ceph pg <pgid> + <command>' less awkward by making it clearer that the 'tell' + commands are talking to the OSD serving the placement group, not the + monitor. diff --git a/ceph.spec.in b/ceph.spec.in index 4365eb55eb0..696d9ad3332 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -146,7 +146,9 @@ managers such as Pacemaker. Summary: RADOS distributed object store client library Group: System Environment/Libraries License: LGPL-2.0 +%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora} Obsoletes: ceph-libs +%endif %description -n librados2 RADOS is a reliable, autonomic distributed object storage cluster developed as part of the Ceph distributed storage system. This is a @@ -157,7 +159,9 @@ store using a simple file-like interface. Summary: RADOS block device client library Group: System Environment/Libraries License: LGPL-2.0 +%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora} Obsoletes: ceph-libs +%endif %description -n librbd1 RBD is a block device striped across multiple distributed objects in RADOS, a reliable, autonomic distributed object storage cluster @@ -168,7 +172,9 @@ shared library allowing applications to manage these block devices. Summary: Ceph distributed file system client library Group: System Environment/Libraries License: LGPL-2.0 +%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora} Obsoletes: ceph-libs +%endif %description -n libcephfs1 Ceph is a distributed network file system designed to provide excellent performance, reliability, and scalability. This is a shared library @@ -182,6 +188,8 @@ License: LGPL-2.0 Requires: librados2 = %{version}-%{release} Requires: librbd1 = %{version}-%{release} Requires: libcephfs1 = %{version}-%{release} +Requires: python-flask +Requires: python-requests %if 0%{defined suse_version} %py_requires %endif diff --git a/configure.ac b/configure.ac index 415da311712..812126664eb 100644 --- a/configure.ac +++ b/configure.ac @@ -8,7 +8,7 @@ AC_PREREQ(2.59) # VERSION define is not used by the code. It gets a version string # from 'git describe'; see src/ceph_ver.[ch] -AC_INIT([ceph], [0.67-rc1], [ceph-devel@vger.kernel.org]) +AC_INIT([ceph], [0.67], [ceph-devel@vger.kernel.org]) # Create release string. Used with VERSION for RPMs. RPM_RELEASE=0 diff --git a/debian/changelog b/debian/changelog index cfcf0491dd8..7b814f0da90 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +ceph (0.67-rc2-1) precise; urgency=low + + * New upstream release + + -- Gary Lowell <gary.lowell@inktank.com> Wed, 24 Jul 2013 16:18:33 -0700 + ceph (0.67-rc1-1) precise; urgency=low * New upstream release diff --git a/debian/control b/debian/control index 241fabb5e4f..195cb37fe62 100644 --- a/debian/control +++ b/debian/control @@ -100,7 +100,7 @@ Description: debugging symbols for ceph-mds Package: ceph-fuse Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} -Recommends: fuse | fuse-utils +Recommends: fuse Description: FUSE-based client for the Ceph distributed file system Ceph is a distributed network file system designed to provide excellent performance, reliability, and scalability. This is a @@ -129,7 +129,7 @@ Description: debugging symbols for ceph-fuse Package: rbd-fuse Architecture: linux-any Depends: ${misc:Depends}, ${shlibs:Depends} -Recommends: fuse | fuse-utils +Recommends: fuse Description: FUSE-based rbd client for the Ceph distributed file system Ceph is a distributed network file system designed to provide excellent performance, reliability, and scalability. This is a @@ -389,7 +389,7 @@ Description: Ceph test and benchmarking tools. Package: python-ceph Architecture: linux-any Section: python -Depends: librados2, librbd1, ${misc:Depends}, ${python:Depends} +Depends: librados2, librbd1, python-flask, ${misc:Depends}, ${python:Depends}, python-requests X-Python-Version: >= 2.6 Description: Python libraries for the Ceph distributed filesystem Ceph is a distributed storage and network file system designed to provide diff --git a/debian/copyright b/debian/copyright index 4295a1564f9..e94a11b9962 100644 --- a/debian/copyright +++ b/debian/copyright @@ -16,11 +16,6 @@ Copyright: Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> License: GPL2 -Files: src/common/fiemap.cc -Copyright: - Copyright (C) 2010 Canonical -License: GPL2 - Files: src/mount/canonicalize.c Copyright: Copyright (C) 1993 Rick Sladkey <jrs@world.std.com> License: LGPL2 or later diff --git a/doc/changelog/v0.61.6.txt b/doc/changelog/v0.61.6.txt new file mode 100644 index 00000000000..3a1e5bd4be9 --- /dev/null +++ b/doc/changelog/v0.61.6.txt @@ -0,0 +1,75 @@ +commit 59ddece17e36fef69ecf40e239aeffad33c9db35 +Author: Gary Lowell <gary.lowell@inktank.com> +Date: Tue Jul 23 13:52:19 2013 -0700 + + v0.61.6 + +commit 38c3271d3fc415919f0856398bd94eb87a0776b5 +Author: Sage Weil <sage@inktank.com> +Date: Tue Jul 23 13:32:12 2013 -0700 + + mon/OSDMonitor: fix base case for 7fb3804fb workaround + + After cluster creation, we have no full map stored and first_committed == + 1. In that case, there is no need for a full map, since we can get there + from OSDMap() and the incrementals. + + Backport: cuttlefish + Signed-off-by: Sage Weil <sage@inktank.com> + Reviewed-by: Joao Eduardo Luis <joao@inktank.com> + (cherry picked from commit e807770784175b05130bba938113fdbf874f152e) + +commit f94956cb1a56ff62e01b7ae218a93c4004470ae5 +Author: Joao Eduardo Luis <joao.luis@inktank.com> +Date: Tue Jul 23 17:25:13 2013 +0100 + + mon: OSDMonitor: work around a full version bug introduced in 7fb3804fb + + In 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6 we moved the full version + stashing logic to the encode_trim_extra() function. However, we forgot + to update the osdmap's 'latest_full' key that should always point to + the latest osdmap full version. This eventually degenerated in a missing + full version after a trim. This patch works around this bug by looking + for the latest available full osdmap version in the store and updating + 'latest_full' to its proper value. + + Related-to: #5704 + Backport: cuttlefish + + Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com> + Reviewed-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 97462a3213e5e15812c79afc0f54d697b6c498b1) + +commit 10e1de45dc8ace793ecf921f884f90c9daa99c48 +Author: Joao Eduardo Luis <joao.luis@inktank.com> +Date: Tue Jul 23 16:36:52 2013 +0100 + + mon: OSDMonitor: update the osdmap's latest_full with the new full version + + We used to do this on encode_full(), but since [1] we no longer rely on + PaxosService to manage the full maps for us. And we forgot to write down + the latest_full version to the store, leaving it in a truly outdated state. + + [1] - 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6 + + Fixes: #5704 + Backport: cuttlefish + Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com> + Reviewed-by: Sage Weil <sage@inktank.com> + (cherry picked from commit a815547ed3e5ffdbbb96c8c0c1b8d6dd8c62bfba) + +commit a0cb40b45c4f2f921a63c2d7bb5a28572381d793 +Author: Sage Weil <sage@inktank.com> +Date: Thu Jul 18 14:35:19 2013 -0700 + + mon: decline to scrub when paxos is not active + + In f1ce8d7c955a2443111bf7d9e16b4c563d445712 we close a race between scrub + and paxos commit completion on the leader. The fix is nontrivial to + backport and probably not worthwhile; just avoid scrubbing at that time + for now. + + Note that the actual fix for this is in commit + f1ce8d7c955a2443111bf7d9e16b4c563d445712. + + Signed-off-by: Sage Weil <sage@inktank.com> diff --git a/doc/changelog/v0.61.7.txt b/doc/changelog/v0.61.7.txt new file mode 100644 index 00000000000..5836c6a32d3 --- /dev/null +++ b/doc/changelog/v0.61.7.txt @@ -0,0 +1,220 @@ +commit 8f010aff684e820ecc837c25ac77c7a05d7191ff +Author: Gary Lowell <gary.lowell@inktank.com> +Date: Wed Jul 24 20:44:12 2013 -0700 + + v0.61.7 + +commit 24a56a9637afd8c64b71d264359c78a25d52be02 +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 24 14:46:24 2013 -0700 + + ceph-disk: use new get_dev_path helper for list + + Backport: cuttlefish + Signed-off-by: Sage Weil <sage@inktank.com> + Reviewed-by: Dan Mick <dan.mick@inktank.com> + Tested-by: Olivier Bonvalet <ob.ceph@daevel.fr> + (cherry picked from commit fd1fd664d6102a2a96b27e8ca9933b54ac626ecb) + +commit 1f8e4b15eeb132fd7f389318009b19f8f13adbf5 +Author: Sage Weil <sage@inktank.com> +Date: Thu Jul 11 12:59:56 2013 -0700 + + ceph-disk: use /sys/block to determine partition device names + + Not all devices are basename + number; some have intervening character(s), + like /dev/cciss/c0d1p2. + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 2ea8fac441141d64ee0d26c5dd2b441f9782d840) + +commit 0a08c8226cb3e461301beade9bab2e264d1b960e +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 3 11:01:58 2013 -0700 + + ceph-disk: reimplement is_partition() using /sys/block + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 5b031e100b40f597752b4917cdbeebb366eb98d7) + +commit 056000346db09ea7274a22e57cf4b86a7ea4090e +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 3 11:01:39 2013 -0700 + + ceph-disk: use get_dev_name() helper throughout + + This is more robust than the broken split trick. + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 3359aaedde838c98d1155611e157fd2da9e8b9f5) + +commit f3ee3e690c42769229a6cd9ae8dec43f2aa22ecd +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 3 10:55:36 2013 -0700 + + ceph-disk: refactor list_[all_]partitions + + Make these methods work in terms of device *names*, not paths, and fix up + the only direct list_partitions() caller to do the same. + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 35d3f2d84808efda3d2ac868afe03e6959d51c03) + +commit be12811b4cb98ff1c2c691c67af7ad3586c436ff +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 3 10:52:29 2013 -0700 + + ceph-disk: add get_dev_name, path helpers + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit e0401591e352ea9653e3276d66aebeb41801eeb3) + +commit f46dbc462f623e9ab6c00394abb4d890e5d90890 +Author: Sage Weil <sage@inktank.com> +Date: Tue Jun 18 16:21:48 2013 -0700 + + ceph-disk: handle /dev/foo/bar devices throughout + + Assume the last component is the unique device name, even if it appears + under a subdir of /dev. + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit cb97338b1186939deecb78e9d949c38c3ef59026) + +commit f799dac7bdf7cf0824a177131473cf59ef3c5205 +Author: Sage Weil <sage@inktank.com> +Date: Mon Jun 17 20:54:15 2013 -0700 + + ceph-disk: make is_held() smarter about full disks + + Handle the case where the device is a full disk. Make the partition + check a bit more robust (don't make assumptions about naming aside from + the device being a prefix of the partition). + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit e082f1247fb6ddfb36c4223cbfdf500d6b45c978) + +commit 27f31895664fa7f10c1617d486f2a6ece0f97091 +Author: Sage Weil <sage@inktank.com> +Date: Wed Jul 24 11:55:42 2013 -0700 + + mon/OSDMonitor: search for latest full osdmap if record version is missing + + In 97462a3213e5e15812c79afc0f54d697b6c498b1 we tried to search for a + recent full osdmap but were looking at the wrong key. If full_0 was + present we could record that the latest full map was last_committed even + though it wasn't present. This is fixed in 76cd7ac1c, but we need to + compensate for when get_version_latest_full() gives us a back version + number by repeating the search. + + Fixes: #5737 + Signed-off-by: Sage Weil <sage@inktank.com> + Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com> + (cherry picked from commit c2131d4047156aa2964581c9dbd93846382a07e7) + +commit 5b0967f03efb1be210b52f24f095f023fe1bc539 +Author: Joao Eduardo Luis <joao.luis@inktank.com> +Date: Mon Jun 17 14:43:36 2013 +0100 + + test: test_store_tool: global init before using LevelDBStore + + Fixes a segfault + + Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com> + Reviewed-by: Sage Weil <sage@inktank.com> + (cherry picked from commit a7a7d3fc8a2ba4a30ef136a32f2903d157b3e19a) + +commit 115468c73f121653eec2efc030d5ba998d834e43 +Author: Joao Eduardo Luis <joao.luis@inktank.com> +Date: Wed Jul 24 12:00:28 2013 +0100 + + mon: OSDMonitor: fix a bug introduced on 97462a32 + + Fixes: #5737 + Backport: cuttlefish + + Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com> + Reviewed-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 76cd7ac1c2094b34ad36bea89b2246fa90eb2f6d) + +commit 938a639e2cb6abd22c2c588e619c1aae32c6521f +Author: Sage Weil <sage@inktank.com> +Date: Sun Jul 21 08:48:18 2013 -0700 + + mon/Paxos: fix pn for uncommitted value during collect/last phase + + During the collect/last exchange, peers share any uncommitted values + with the leader. They are supposed to also share the pn under which + that value was accepted, but were instead using the just-accepted pn + value. This effectively meant that we *always* took the uncommitted + value; if there were multiples, which one we accepted depended on what + order the LAST messages arrived, not which pn the values were generated + under. + + The specific failure sequence I observed: + + - collect + - learned uncommitted value for 262 from myself + - send collect with pn 901 + - got last with pn 901 (incorrect) for 200 (old) from peer + - discard our own value, remember the other + - finish collect phase + - ignore old uncommitted value + + Fix this by storing a pending_v and pending_pn value whenever we accept + a value. Use this to send an appropriate pn value in the LAST reply + so that the leader can make it's decision about which uncommitted value + to accept based on accurate information. Also use it when we learn + the uncommitted value from ourselves. + + We could probably be more clever about storing less information here, + for example by omitting pending_v and clearing pending_pn at the + appropriate point, but that would be more fragile. Similarly, we could + store a pn for *every* commit if we wanted to lay some groundwork for + having multiple uncommitted proposals in flight, but I don't want to + speculate about what is necessary or sufficient for a correct solution + there. + + Fixes: #5698 + Backport: cuttlefish, bobtail + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 20baf662112dd5f560bc3a2d2114b469444c3de8) + +commit 18596340f020be1f21bdc9bcc752ae1da4a93a46 +Author: Sage Weil <sage@inktank.com> +Date: Sun Jul 21 08:12:46 2013 -0700 + + mon/Paxos: debug ignored uncommitted values + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit 19b29788966eb80ed847630090a16a3d1b810969) + +commit f598245f1355d7791162c03d90bdd97b013e56f3 +Author: Sage Weil <sage@inktank.com> +Date: Sun Jul 21 08:11:22 2013 -0700 + + mon/Paxos: only learn uncommitted value if it is in the future + + If an older peer sends an uncommitted value, make sure we only take it + if it is in the future, and at least as new as any current uncommitted + value. + + (Prior to the previous patch, peers could send values from long-past + rounds. The pn values are also bogus.) + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit b3253a453c057914753846c77499f98d3845c58e) + +commit 732286a28cd8a643593d490a7a84a590d372f78d +Author: Sage Weil <sage@inktank.com> +Date: Mon Jul 22 14:13:23 2013 -0700 + + mon/Paxos: only share uncommitted value if it is next + + We may have an uncommitted value from our perspective (it is our lc + 1) + when the collector has a much larger lc (because we have been out for + the last few rounds). Only share an uncommitted value if it is in fact + the next value. + + Signed-off-by: Sage Weil <sage@inktank.com> + (cherry picked from commit b26b7f6e5e02ac6beb66e3e34e177e6448cf91cf) diff --git a/doc/dev/repo-lab-access.rst b/doc/dev/repo-lab-access.rst new file mode 100644 index 00000000000..706f02e395c --- /dev/null +++ b/doc/dev/repo-lab-access.rst @@ -0,0 +1,88 @@ +Notes on Ceph repositories and test lab +======================================= + +Special branches +---------------- + +* ``master'': current tip (integration branch) +* ``next'': pending release (feature frozen, bugfixes only) +* ``last'': last/previous release +* ``dumpling'', ``cuttlefish'', ``bobtail'', ``argonaut'', etc.: stable release branches +* ``dumpling-next'': backports for stable release, pending testing + +Rules +----- + +The source repos are all on github. + +* Any branch pushed to ceph.git will kick off builds that will either + run unit tests or generate packages for gitbuilder.ceph.com. Try + not to generate unnecessary load. For private, unreviewed work, + only push to branches named ``wip-*''. This avoids colliding with + any special branches. + +* Nothing should every reach a special branch unless it has been + reviewed. + +* Preferred means of review is via github pull requests to capture any + review discussion. + +* For multi-patch series, the pull request can be merged via github, + and a Reviewed-by: ... line added to the merge commit. + +* For single- (or few-) patch merges, it is preferable to add the + Reviewed-by: directly to the commit so that it is also visible when + the patch is cherry-picked for backports. + +* All backports should use ``git cherry-pick -x'' to capture which + commit they are cherry-picking from. + + +Teuthology lab hardware +----------------------- + +* 96 plana: 8 core, 4 disk, 1gig and 10gig networks. Used for nightly runs. + +* 64 burnupi: Dell R515s: 6 core, 8 disk, 1gig and 10gig networks, moderate CPU. crummy SAS expanders. + +* 120 mira: 8 core, 8 disks, 1gig network. older hardware, flakier disks + +* 8 vercoi: 24 core, RAID; VM hosts + +* 4 senta: 25 core: VM hosts + +* 36 saya: Calxeda ARM7 nodes (for testing) + +* 24 tala: Calxeda ARM7 nodes (gitbuilder) + +* ~200 vps: VMs running on mira hardware + +Locking machines +---------------- + +* All tests pull their builds from gitbuilder.ceph.com. + +* Anybody can lock machines with ``teuthology-lock --lock-many NUM + --machine-type TYPE''. + +* Machines are locked as ``whoami''@``hostname -s''. --owner to + choose otherwise. + +* Automated tests current run on the ``plana''; please avoid locking + these for personal use. + +* To unlock, please use ``teuthology-nuke -t list.yaml -r -u'', which + will reboot and clean up any leftover test state before unlocking + (or fail to unlock). It looks for a ``targets::'' section in the + yaml, so the regular job yaml will work. You can get a list of all + locked machines with ``teuthology-lock --list-targets''. + +* ``teuthology-lock -a --brief'' or ``teuthology-lock --summary'' to + see what is locked and by whom. + +* Be conscientious about scheduling entire qa runs. Coordinate + utilization on IRC. Make sure you are running the latest version + ceph-qa-suite.git and teuthology.git. + +* Results for scheduled runs appear in /a/$jobname on the teuthology + machine. ``ls -alt | head'' to find them. diff --git a/doc/man/8/ceph-rest-api.rst b/doc/man/8/ceph-rest-api.rst index 8a87f97ce19..a000d09c676 100644 --- a/doc/man/8/ceph-rest-api.rst +++ b/doc/man/8/ceph-rest-api.rst @@ -7,7 +7,7 @@ Synopsis ======== -| **ceph-rest-api** [ -c *conffile* ] [ -n *name* ... ] +| **ceph-rest-api** [ -c *conffile* ] [--cluster *clustername* ] [ -n *name* ] [-i *id* ] Description @@ -21,7 +21,7 @@ command-line tool through an HTTP-accessible interface. Options ======= -.. option:: -c/--conf *conffile* +.. option:: -c/--conf conffile names the ceph.conf file to use for configuration. If -c is not specified, the default depends on the state of the --cluster option @@ -35,21 +35,26 @@ Options so you can also pass this option in the environment as CEPH_CONF. -.. option:: --cluster *clustername* +.. option:: --cluster clustername set *clustername* for use in the $cluster metavariable, for locating the ceph.conf file. The default is 'ceph'. - You can also pass this option in the environment as - CEPH_CLUSTER_NAME. -.. option:: -n/--name *name* +.. option:: -n/--name name specifies the client 'name', which is used to find the client-specific configuration options in the config file, and also is the name used for authentication when connecting to the cluster (the entity name appearing in ceph auth list output, - for example). The default is 'client.restapi'. You can also - pass this option in the environment as CEPH_NAME. + for example). The default is 'client.restapi'. + +.. option:: -i/--id id + + specifies the client 'id', which will form the clientname + as 'client.<id>' if clientname is not set. If -n/-name is + set, that takes precedence. + + Also, global Ceph options are supported. Configuration parameters @@ -57,16 +62,22 @@ Configuration parameters Supported configuration parameters include: -* **restapi client name** the 'clientname' used for auth and ceph.conf -* **restapi keyring** the keyring file holding the key for 'clientname' -* **restapi public addr** ip:port to listen on (default 0.0.0.0:5000) +* **keyring** the keyring file holding the key for 'clientname' +* **public addr** ip:port to listen on (default 0.0.0.0:5000) +* **log file** (usual Ceph default) * **restapi base url** the base URL to answer requests on (default /api/v0.1) -* **restapi log level** critical, error, warning, info, debug -* **restapi log file** (default /var/local/ceph/<clientname>.log) +* **restapi log level** critical, error, warning, info, debug (default warning) + +Configuration parameters are searched in the standard order: +first in the section named '<clientname>', then 'client', then 'global'. -A server will run on **restapi public addr** if the ceph-rest-api -executed directly; otherwise, configuration is specified by the -enclosing WSGI web server. +<clientname> is either supplied by -n/--name, "client.<id>" where +<id> is supplied by -i/--id, or 'client.restapi' if neither option +is present. + +A single-threaded server will run on **public addr** if the ceph-rest-api +executed directly; otherwise, configuration is specified by the enclosing +WSGI web server. Commands ======== @@ -92,7 +103,9 @@ with a small description of each command, is provided when the requested path is incomplete/partially matching. Requesting / will redirect to the value of **restapi base url**, and that path will give a full list of all known commands. The command set is very similar to the commands -supported by the **ceph** tool. +supported by the **ceph** tool. One notable exception is that the +``ceph pg <pgid> <command>`` style of commands is supported here +as ``tell/<pgid>/command?args``. Deployment as WSGI application ============================== @@ -101,18 +114,22 @@ When deploying as WSGI application (say, with Apache/mod_wsgi, or nginx/uwsgi, or gunicorn, etc.), use the ``ceph_rest_api.py`` module (``ceph-rest-api`` is a thin layer around this module). The standalone web server is of course not used, so address/port configuration is done in -the WSGI server. Also, configuration switches are not passed; rather, -environment variables are used: - -* CEPH_CONF holds -c/--conf -* CEPH_CLUSTER_NAME holds --cluster -* CEPH_NAME holds -n/--name - -Any errors reading configuration or connecting to the cluster cause -ImportError to be raised with a descriptive message on import; see -your WSGI server documentation for how to see those messages in case -of problem. - +the WSGI server. Use a python .wsgi module or the equivalent to call +``app = generate_app(conf, cluster, clientname, clientid, args)`` where: + +* conf is as -c/--conf above +* cluster is as --cluster above +* clientname, -n/--name +* clientid, -i/--id, and +* args are any other generic Ceph arguments + +When app is returned, it will have attributes 'ceph_addr' and 'ceph_port' +set to what the address and port are in the Ceph configuration; +those may be used for the server, or ignored. + +Any errors reading configuration or connecting to the cluster cause an +exception to be raised; see your WSGI server documentation for how to +see those messages in case of problem. Availability ============ diff --git a/doc/release-notes.rst b/doc/release-notes.rst index b71e2a0f1bf..9216ca9c192 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -2,7 +2,7 @@ Release Notes =============== -v0.67-rc1 +v0.67-rc2 --------- This is a release candidate for v0.67, code-named "Dumpling." Any @@ -19,8 +19,8 @@ The headline features for this release include: * Object namespaces in librados. -Upgrading from v0.66 -~~~~~~~~~~~~~~~~~~~~~ +Upgrading from v0.66 +~~~~~~~~~~~~~~~~~~~~ * There is monitor internal protocol change, which means that v0.67 ceph-mon daemons cannot talk to v0.66 or older daemons. We @@ -287,6 +287,42 @@ Notable Changes * misc code cleanups +v0.61.7 "Cuttlefish" +-------------------- + +This release fixes another regression preventing monitors to start after +undergoing certain upgrade sequences, as well as some corner cases with +Paxos and support for unusual device names in ceph-disk/ceph-deploy. + +Notable Changes +~~~~~~~~~~~~~~~ + +* mon: fix regression in latest full osdmap retrieval +* mon: fix a long-standing bug in a paxos corner case +* ceph-disk: improved support for unusual device names (e.g., /dev/cciss/c0d0) + +For more detailed information, see :download:`the complete changelog <changelog/v0.61.7.txt>`. + + +v0.61.6 "Cuttlefish" +-------------------- + +This release fixes a regression in v0.61.5 that could prevent monitors +from restarting. This affects any cluster that was upgraded from a +previous version of Ceph (and not freshly created with v0.61.5). + +All users are strongly recommended to upgrade. + +Notable Changes +~~~~~~~~~~~~~~~ + +* mon: record latest full osdmap +* mon: work around previous bug in which latest full osdmap is not recorded +* mon: avoid scrub while updating + +For more detailed information, see :download:`the complete changelog <changelog/v0.61.6.txt>`. + + v0.61.5 "Cuttlefish" -------------------- diff --git a/man/ceph-authtool.8 b/man/ceph-authtool.8 index 47888af1f22..e64cac95f0a 100644 --- a/man/ceph-authtool.8 +++ b/man/ceph-authtool.8 @@ -69,16 +69,33 @@ will create a new keyring, overwriting any existing keyringfile .UNINDENT .INDENT 0.0 .TP -.B \-\-gen\-key +.B \-g, \-\-gen\-key will generate a new secret key for the specified entityname .UNINDENT .INDENT 0.0 .TP -.B \-\-add\-key +.B \-a, \-\-add\-key will add an encoded key to the keyring .UNINDENT .INDENT 0.0 .TP +.B \-u, \-\-set\-uid +sets the auid (authenticated user id) for the specified entityname +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-gen\-print\-key +will generate and print a new secret key without adding it to the keyringfile + +NOTE: will work without a given keyringfile +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-import\-keyring +will import the content of a given keyring into the keyringfile +.UNINDENT +.INDENT 0.0 +.TP .B \-\-cap subsystem capability will set the capability for given subsystem .UNINDENT diff --git a/man/ceph-rest-api.8 b/man/ceph-rest-api.8 index 33425fecc00..5170b7f37cf 100644 --- a/man/ceph-rest-api.8 +++ b/man/ceph-rest-api.8 @@ -1,4 +1,4 @@ -.TH "CEPH-REST-API" "8" "July 12, 2013" "dev" "Ceph" +.TH "CEPH-REST-API" "8" "July 26, 2013" "dev" "Ceph" .SH NAME ceph-rest-api \- ceph RESTlike administration server . @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] . .SH SYNOPSIS .nf -\fBceph\-rest\-api\fP [ \-c \fIconffile\fP ] [ \-n \fIname\fP ... ] +\fBceph\-rest\-api\fP [ \-c \fIconffile\fP ] [\-\-cluster \fIclustername\fP ] [ \-n \fIname\fP ] [\-i \fIid\fP ] .fi .sp .SH DESCRIPTION @@ -44,7 +44,7 @@ command\-line tool through an HTTP\-accessible interface. .SH OPTIONS .INDENT 0.0 .TP -.B \-c/\-\-conf *conffile* +.B \-c/\-\-conf conffile names the ceph.conf file to use for configuration. If \-c is not specified, the default depends on the state of the \-\-cluster option (default \(aqceph\(aq; see below). The configuration file is searched @@ -64,43 +64,54 @@ so you can also pass this option in the environment as CEPH_CONF. .UNINDENT .INDENT 0.0 .TP -.B \-\-cluster *clustername* +.B \-\-cluster clustername set \fIclustername\fP for use in the $cluster metavariable, for locating the ceph.conf file. The default is \(aqceph\(aq. -You can also pass this option in the environment as -CEPH_CLUSTER_NAME. .UNINDENT .INDENT 0.0 .TP -.B \-n/\-\-name *name* +.B \-n/\-\-name name specifies the client \(aqname\(aq, which is used to find the client\-specific configuration options in the config file, and also is the name used for authentication when connecting to the cluster (the entity name appearing in ceph auth list output, -for example). The default is \(aqclient.restapi\(aq. You can also -pass this option in the environment as CEPH_NAME. +for example). The default is \(aqclient.restapi\(aq. +.UNINDENT +.INDENT 0.0 +.TP +.B \-i/\-\-id id +specifies the client \(aqid\(aq, which will form the clientname +as \(aqclient.<id>\(aq if clientname is not set. If \-n/\-name is +set, that takes precedence. +.sp +Also, global Ceph options are supported. .UNINDENT .SH CONFIGURATION PARAMETERS .sp Supported configuration parameters include: .INDENT 0.0 .IP \(bu 2 -\fBrestapi client name\fP the \(aqclientname\(aq used for auth and ceph.conf +\fBkeyring\fP the keyring file holding the key for \(aqclientname\(aq .IP \(bu 2 -\fBrestapi keyring\fP the keyring file holding the key for \(aqclientname\(aq +\fBpublic addr\fP ip:port to listen on (default 0.0.0.0:5000) .IP \(bu 2 -\fBrestapi public addr\fP ip:port to listen on (default 0.0.0.0:5000) +\fBlog file\fP (usual Ceph default) .IP \(bu 2 \fBrestapi base url\fP the base URL to answer requests on (default /api/v0.1) .IP \(bu 2 -\fBrestapi log level\fP critical, error, warning, info, debug -.IP \(bu 2 -\fBrestapi log file\fP (default /var/local/ceph/<clientname>.log) +\fBrestapi log level\fP critical, error, warning, info, debug (default warning) .UNINDENT .sp -A server will run on \fBrestapi public addr\fP if the ceph\-rest\-api -executed directly; otherwise, configuration is specified by the -enclosing WSGI web server. +Configuration parameters are searched in the standard order: +first in the section named \(aq<clientname>\(aq, then \(aqclient\(aq, then \(aqglobal\(aq. +.sp +<clientname> is either supplied by \-n/\-\-name, "client.<id>" where +<id> is supplied by \-i/\-\-id, or \(aqclient.restapi\(aq if neither option +is present. +.sp +A single\-threaded server will run on \fBpublic addr\fP if the ceph\-rest\-api +executed directly; otherwise, configuration is specified by the enclosing +WSGI web server. .SH COMMANDS .sp Commands are submitted with HTTP GET requests (for commands that @@ -122,28 +133,37 @@ with a small description of each command, is provided when the requested path is incomplete/partially matching. Requesting / will redirect to the value of \fBrestapi base url\fP, and that path will give a full list of all known commands. The command set is very similar to the commands -supported by the \fBceph\fP tool. +supported by the \fBceph\fP tool. One notable exception is that the +\fBceph pg <pgid> <command>\fP style of commands is supported here +as \fBtell/<pgid>/command?args\fP. .SH DEPLOYMENT AS WSGI APPLICATION .sp When deploying as WSGI application (say, with Apache/mod_wsgi, or nginx/uwsgi, or gunicorn, etc.), use the \fBceph_rest_api.py\fP module (\fBceph\-rest\-api\fP is a thin layer around this module). The standalone web server is of course not used, so address/port configuration is done in -the WSGI server. Also, configuration switches are not passed; rather, -environment variables are used: +the WSGI server. Use a python .wsgi module or the equivalent to call +\fBapp = generate_app(conf, cluster, clientname, clientid, args)\fP where: .INDENT 0.0 .IP \(bu 2 -CEPH_CONF holds \-c/\-\-conf +conf is as \-c/\-\-conf above +.IP \(bu 2 +cluster is as \-\-cluster above .IP \(bu 2 -CEPH_CLUSTER_NAME holds \-\-cluster +clientname, \-n/\-\-name .IP \(bu 2 -CEPH_NAME holds \-n/\-\-name +clientid, \-i/\-\-id, and +.IP \(bu 2 +args are any other generic Ceph arguments .UNINDENT .sp -Any errors reading configuration or connecting to the cluster cause -ImportError to be raised with a descriptive message on import; see -your WSGI server documentation for how to see those messages in case -of problem. +When app is returned, it will have attributes \(aqceph_addr\(aq and \(aqceph_port\(aq +set to what the address and port are in the Ceph configuration; +those may be used for the server, or ignored. +.sp +Any errors reading configuration or connecting to the cluster cause an +exception to be raised; see your WSGI server documentation for how to +see those messages in case of problem. .SH AVAILABILITY .sp \fBceph\-rest\-api\fP is part of the Ceph distributed file system. Please refer to the Ceph documentation at diff --git a/qa/fs/.gitignore b/qa/fs/.gitignore new file mode 100644 index 00000000000..a2d280e0794 --- /dev/null +++ b/qa/fs/.gitignore @@ -0,0 +1 @@ +/test_o_trunc diff --git a/qa/workunits/rest/test.py b/qa/workunits/rest/test.py index 272760c8289..f0cf1f2c761 100755 --- a/qa/workunits/rest/test.py +++ b/qa/workunits/rest/test.py @@ -1,8 +1,8 @@ #!/usr/bin/python import exceptions +import json import os -# import nosetests import requests import subprocess import sys @@ -43,9 +43,10 @@ def expect(url, method, respcode, contenttype, extra_hdrs=None, data=None): if contenttype.startswith('application'): if r_contenttype == 'application/json': - # may raise try: - assert(r.json != None) + # older requests.py doesn't create r.myjson; create it myself + r.myjson = json.loads(r.content) + assert(r.myjson != None) except Exception as e: fail(r, 'Invalid JSON returned: "{0}"'.format(str(e))) @@ -71,7 +72,7 @@ if __name__ == '__main__': expect('auth/export', 'GET', 200, 'xml', XMLHDR) expect('auth/add?entity=client.xx&' - 'caps=mon&caps=allow&caps=osd&caps=allow *', 'PUT', 200, 'json', + 'caps=mon&caps=allow&caps=osd&caps=allow+*', 'PUT', 200, 'json', JSONHDR) r = expect('auth/export?entity=client.xx', 'GET', 200, 'plain') @@ -83,7 +84,7 @@ if __name__ == '__main__': assert('client.xx' in r.content) r = expect('auth/list.json', 'GET', 200, 'json') - dictlist = r.json['output']['auth_dump'] + dictlist = r.myjson['output']['auth_dump'] xxdict = [d for d in dictlist if d['entity'] == 'client.xx'][0] assert(xxdict) assert('caps' in xxdict) @@ -94,10 +95,10 @@ if __name__ == '__main__': expect('auth/print-key?entity=client.xx', 'GET', 200, 'json', JSONHDR) expect('auth/print_key?entity=client.xx', 'GET', 200, 'json', JSONHDR) - expect('auth/caps?entity=client.xx&caps=osd&caps=allow rw', 'PUT', 200, + expect('auth/caps?entity=client.xx&caps=osd&caps=allow+rw', 'PUT', 200, 'json', JSONHDR) r = expect('auth/list.json', 'GET', 200, 'json') - dictlist = r.json['output']['auth_dump'] + dictlist = r.myjson['output']['auth_dump'] xxdict = [d for d in dictlist if d['entity'] == 'client.xx'][0] assert(xxdict) assert('caps' in xxdict) @@ -116,7 +117,7 @@ if __name__ == '__main__': expect('auth/del?entity=client.xx', 'PUT', 200, 'json', JSONHDR) r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert('epoch' in r.json['output']) + assert('epoch' in r.myjson['output']) assert('GLOBAL' in expect('df', 'GET', 200, 'plain').content) assert('CATEGORY' in expect('df?detail=detail', 'GET', 200, 'plain').content) @@ -124,12 +125,12 @@ if __name__ == '__main__': assert('CATEGORY' in expect('df?detail', 'GET', 200, 'plain').content) r = expect('df', 'GET', 200, 'json', JSONHDR) - assert('total_used' in r.json['output']['stats']) + assert('total_used' in r.myjson['output']['stats']) r = expect('df', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/stats/stats/total_used') is not None) r = expect('df?detail', 'GET', 200, 'json', JSONHDR) - assert('rd_kb' in r.json['output']['pools'][0]['stats']) + assert('rd_kb' in r.myjson['output']['pools'][0]['stats']) r = expect('df?detail', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/stats/pools/pool/stats/rd_kb') is not None) @@ -149,7 +150,7 @@ if __name__ == '__main__': expect('mds/compat/rm_incompat?feature=4', 'PUT', 200, '') r = expect('mds/compat/show', 'GET', 200, 'json', JSONHDR) - assert('incompat' in r.json['output']) + assert('incompat' in r.myjson['output']) r = expect('mds/compat/show', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/mds_compat/incompat') is not None) @@ -157,8 +158,8 @@ if __name__ == '__main__': expect('mds/deactivate?who=2', 'PUT', 400, '') r = expect('mds/dump.json', 'GET', 200, 'json') - assert('created' in r.json['output']) - current_epoch = r.json['output']['epoch'] + assert('created' in r.myjson['output']) + current_epoch = r.myjson['output']['epoch'] r = expect('mds/dump.xml', 'GET', 200, 'xml') assert(r.tree.find('output/mdsmap/created') is not None) @@ -171,7 +172,7 @@ if __name__ == '__main__': 200, '') expect('osd/pool/create?pool=data2&pg_num=10', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - pools = r.json['output']['pools'] + pools = r.myjson['output']['pools'] poolnum = None for p in pools: if p['pool_name'] == 'data2': @@ -185,12 +186,12 @@ if __name__ == '__main__': '&sure=--yes-i-really-really-mean-it', 'PUT', 200, '') expect('mds/set_max_mds?maxmds=4', 'PUT', 200, '') r = expect('mds/dump.json', 'GET', 200, 'json') - assert(r.json['output']['max_mds'] == 4) + assert(r.myjson['output']['max_mds'] == 4) expect('mds/set_max_mds?maxmds=3', 'PUT', 200, '') r = expect('mds/stat.json', 'GET', 200, 'json') - assert('info' in r.json['output']) + assert('info' in r.myjson['output']['mdsmap']) r = expect('mds/stat.xml', 'GET', 200, 'xml') - assert(r.tree.find('output/mdsmap/info') is not None) + assert(r.tree.find('output/mds_stat/mdsmap/info') is not None) # more content tests below, just check format here expect('mon/dump.json', 'GET', 200, 'json') @@ -199,17 +200,17 @@ if __name__ == '__main__': r = expect('mon/getmap', 'GET', 200, '') assert(len(r.content) != 0) r = expect('mon_status.json', 'GET', 200, 'json') - assert('name' in r.json['output']) + assert('name' in r.myjson['output']) r = expect('mon_status.xml', 'GET', 200, 'xml') assert(r.tree.find('output/mon_status/name') is not None) bl = '192.168.0.1:0/1000' expect('osd/blacklist?blacklistop=add&addr=' + bl, 'PUT', 200, '') r = expect('osd/blacklist/ls.json', 'GET', 200, 'json') - assert([b for b in r.json['output'] if b['addr'] == bl]) + assert([b for b in r.myjson['output'] if b['addr'] == bl]) expect('osd/blacklist?blacklistop=rm&addr=' + bl, 'PUT', 200, '') r = expect('osd/blacklist/ls.json', 'GET', 200, 'json') - assert([b for b in r.json['output'] if b['addr'] == bl] == []) + assert([b for b in r.myjson['output'] if b['addr'] == bl] == []) expect('osd/crush/tunables?profile=legacy', 'PUT', 200, '') expect('osd/crush/tunables?profile=bobtail', 'PUT', 200, '') @@ -222,73 +223,73 @@ if __name__ == '__main__': expect('osd/down?ids=0', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osds'][0]['osd'] == 0) - assert(r.json['output']['osds'][0]['up'] == 0) + assert(r.myjson['output']['osds'][0]['osd'] == 0) + assert(r.myjson['output']['osds'][0]['up'] == 0) expect('osd/unset?key=noup', 'PUT', 200, '') for i in range(0,100): r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osds'][0]['osd'] == 0) - if r.json['output']['osds'][0]['up'] == 1: + assert(r.myjson['output']['osds'][0]['osd'] == 0) + if r.myjson['output']['osds'][0]['up'] == 1: break else: print >> sys.stderr, "waiting for osd.0 to come back up" time.sleep(10) r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osds'][0]['osd'] == 0) - assert(r.json['output']['osds'][0]['up'] == 1) + assert(r.myjson['output']['osds'][0]['osd'] == 0) + assert(r.myjson['output']['osds'][0]['up'] == 1) r = expect('osd/find?id=1', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osd'] == 1) + assert(r.myjson['output']['osd'] == 1) expect('osd/out?ids=1', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osds'][1]['osd'] == 1) - assert(r.json['output']['osds'][1]['in'] == 0) + assert(r.myjson['output']['osds'][1]['osd'] == 1) + assert(r.myjson['output']['osds'][1]['in'] == 0) expect('osd/in?ids=1', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osds'][1]['osd'] == 1) - assert(r.json['output']['osds'][1]['in'] == 1) + assert(r.myjson['output']['osds'][1]['osd'] == 1) + assert(r.myjson['output']['osds'][1]['in'] == 1) r = expect('osd/find?id=0', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['osd'] == 0) + assert(r.myjson['output']['osd'] == 0) r = expect('osd/getmaxosd', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/getmaxosd/max_osd') is not None) r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR) - saved_maxosd = r.json['output']['max_osd'] + saved_maxosd = r.myjson['output']['max_osd'] expect('osd/setmaxosd?newmax=10', 'PUT', 200, '') r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['max_osd'] == 10) + assert(r.myjson['output']['max_osd'] == 10) expect('osd/setmaxosd?newmax={0}'.format(saved_maxosd), 'PUT', 200, '') r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR) - assert(r.json['output']['max_osd'] == saved_maxosd) + assert(r.myjson['output']['max_osd'] == saved_maxosd) r = expect('osd/create', 'PUT', 200, 'json', JSONHDR) - assert('osdid' in r.json['output']) - osdid = r.json['output']['osdid'] + assert('osdid' in r.myjson['output']) + osdid = r.myjson['output']['osdid'] expect('osd/lost?id={0}'.format(osdid), 'PUT', 400, '') expect('osd/lost?id={0}&sure=--yes-i-really-mean-it'.format(osdid), 'PUT', 200, 'json', JSONHDR) expect('osd/rm?ids={0}'.format(osdid), 'PUT', 200, '') r = expect('osd/ls', 'GET', 200, 'json', JSONHDR) - assert(isinstance(r.json['output'], list)) + assert(isinstance(r.myjson['output'], list)) r = expect('osd/ls', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/osds/osd') is not None) expect('osd/pause', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert('pauserd,pausewr' in r.json['output']['flags']) + assert('pauserd,pausewr' in r.myjson['output']['flags']) expect('osd/unpause', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) - assert('pauserd,pausewr' not in r.json['output']['flags']) + assert('pauserd,pausewr' not in r.myjson['output']['flags']) r = expect('osd/tree', 'GET', 200, 'json', JSONHDR) - assert('nodes' in r.json['output']) + assert('nodes' in r.myjson['output']) r = expect('osd/tree', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/tree/nodes') is not None) expect('osd/pool/mksnap?pool=data&snap=datasnap', 'PUT', 200, '') @@ -297,21 +298,21 @@ if __name__ == '__main__': expect('osd/pool/rmsnap?pool=data&snap=datasnap', 'PUT', 200, '') expect('osd/pool/create?pool=data2&pg_num=10', 'PUT', 200, '') - r = expect('osd/lspools', 'PUT', 200, 'json', JSONHDR) - assert([p for p in r.json['output'] if p['poolname'] == 'data2']) + r = expect('osd/lspools', 'GET', 200, 'json', JSONHDR) + assert([p for p in r.myjson['output'] if p['poolname'] == 'data2']) expect('osd/pool/rename?srcpool=data2&destpool=data3', 'PUT', 200, '') - r = expect('osd/lspools', 'PUT', 200, 'json', JSONHDR) - assert([p for p in r.json['output'] if p['poolname'] == 'data3']) + r = expect('osd/lspools', 'GET', 200, 'json', JSONHDR) + assert([p for p in r.myjson['output'] if p['poolname'] == 'data3']) expect('osd/pool/delete?pool=data3', 'PUT', 400, '') expect('osd/pool/delete?pool=data3&pool2=data3&sure=--yes-i-really-really-mean-it', 'PUT', 200, '') r = expect('osd/stat', 'GET', 200, 'json', JSONHDR) - assert('num_up_osds' in r.json['output']) + assert('num_up_osds' in r.myjson['output']) r = expect('osd/stat', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/osdmap/num_up_osds') is not None) r = expect('osd/ls', 'GET', 200, 'json', JSONHDR) - for osdid in r.json['output']: + for osdid in r.myjson['output']: # XXX no tell yet # expect('tell?target=osd.{0}&args=version'.format(osdid), 'PUT', # 200, '') @@ -321,7 +322,7 @@ if __name__ == '__main__': expect('pg/debug?debugop=degraded_pgs_exist', 'GET', 200, '') expect('pg/deep-scrub?pgid=0.0', 'PUT', 200, '') r = expect('pg/dump', 'GET', 200, 'json', JSONHDR) - assert('pg_stats_sum' in r.json['output']) + assert('pg_stats_sum' in r.myjson['output']) r = expect('pg/dump', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/pg_map/pg_stats_sum') is not None) @@ -335,8 +336,8 @@ if __name__ == '__main__': assert(len(r.content) != 0) r = expect('pg/map?pgid=0.0', 'GET', 200, 'json', JSONHDR) - assert('acting' in r.json['output']) - assert(r.json['output']['pgid'] == '0.0') + assert('acting' in r.myjson['output']) + assert(r.myjson['output']['pgid'] == '0.0') r = expect('pg/map?pgid=0.0', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/pg_map/acting') is not None) assert(r.tree.find('output/pg_map/pgid').text == '0.0') @@ -348,15 +349,15 @@ if __name__ == '__main__': expect('pg/set_full_ratio?ratio=0.90', 'PUT', 200, '') r = expect('pg/dump', 'GET', 200, 'json', JSONHDR) - assert(float(r.json['output']['full_ratio']) == 0.90) + assert(float(r.myjson['output']['full_ratio']) == 0.90) expect('pg/set_full_ratio?ratio=0.95', 'PUT', 200, '') expect('pg/set_nearfull_ratio?ratio=0.90', 'PUT', 200, '') r = expect('pg/dump', 'GET', 200, 'json', JSONHDR) - assert(float(r.json['output']['near_full_ratio']) == 0.90) + assert(float(r.myjson['output']['near_full_ratio']) == 0.90) expect('pg/set_full_ratio?ratio=0.85', 'PUT', 200, '') r = expect('pg/stat', 'GET', 200, 'json', JSONHDR) - assert('pg_stats_sum' in r.json['output']) + assert('pg_stats_sum' in r.myjson['output']) r = expect('pg/stat', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/pg_map/pg_stats_sum') is not None) @@ -367,12 +368,12 @@ if __name__ == '__main__': # report's CRC needs to be handled # r = expect('report', 'GET', 200, 'json', JSONHDR) - # assert('osd_stats' in r.json['output']) + # assert('osd_stats' in r.myjson['output']) # r = expect('report', 'GET', 200, 'xml', XMLHDR) # assert(r.tree.find('output/report/osdmap') is not None) r = expect('status', 'GET', 200, 'json', JSONHDR) - assert('osdmap' in r.json['output']) + assert('osdmap' in r.myjson['output']) r = expect('status', 'GET', 200, 'xml', XMLHDR) assert(r.tree.find('output/status/osdmap') is not None) @@ -393,21 +394,21 @@ if __name__ == '__main__': for v in ['pg_num', 'pgp_num', 'size', 'min_size', 'crash_replay_interval', 'crush_ruleset']: r = expect('osd/pool/get.json?pool=data&var=' + v, 'GET', 200, 'json') - assert(v in r.json['output']) + assert(v in r.myjson['output']) r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json') - assert(r.json['output']['size'] == 2) + assert(r.myjson['output']['size'] == 2) expect('osd/pool/set?pool=data&var=size&val=3', 'PUT', 200, 'plain') r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json') - assert(r.json['output']['size'] == 3) + assert(r.myjson['output']['size'] == 3) expect('osd/pool/set?pool=data&var=size&val=2', 'PUT', 200, 'plain') r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json') - assert(r.json['output']['size'] == 2) + assert(r.myjson['output']['size'] == 2) r = expect('osd/pool/get.json?pool=rbd&var=crush_ruleset', 'GET', 200, 'json') - assert(r.json['output']['crush_ruleset'] == 2) + assert(r.myjson['output']['crush_ruleset'] == 2) expect('osd/thrash?num_epochs=10', 'PUT', 200, '') print 'OK' diff --git a/qa/workunits/suites/fsync-tester.sh b/qa/workunits/suites/fsync-tester.sh index bdb7e583846..b056e3be0d6 100755 --- a/qa/workunits/suites/fsync-tester.sh +++ b/qa/workunits/suites/fsync-tester.sh @@ -6,3 +6,6 @@ wget http://ceph.com/qa/fsync-tester.c gcc fsync-tester.c -o fsync-tester ./fsync-tester + +lsof + diff --git a/src/Makefile.am b/src/Makefile.am index 307abf2f965..93f33312508 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -504,7 +504,7 @@ ceph_radosacl_SOURCES = radosacl.cc ceph_radosacl_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS) bin_DEBUGPROGRAMS += ceph_scratchtool ceph_scratchtoolpp ceph_radosacl -rbd_SOURCES = rbd.cc common/fiemap.cc common/secret.c common/TextTable.cc common/util.cc +rbd_SOURCES = rbd.cc common/secret.c common/TextTable.cc common/util.cc rbd_CXXFLAGS = ${AM_CXXFLAGS} rbd_LDADD = librbd.la librados.la $(LIBGLOBAL_LDA) -lkeyutils if LINUX @@ -762,6 +762,11 @@ unittest_addrs_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} unittest_addrs_LDADD = $(LIBGLOBAL_LDA) ${UNITTEST_LDADD} check_PROGRAMS += unittest_addrs +unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc +unittest_sharedptr_registry_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} +unittest_sharedptr_registry_LDADD = libcommon.la ${LIBGLOBAL_LDA} ${UNITTEST_LDADD} +check_PROGRAMS += unittest_sharedptr_registry + unittest_util_SOURCES = test/common/test_util.cc common/util.cc unittest_util_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS} unittest_util_LDADD = libcommon.la $(PTHREAD_LIBS) -lm ${UNITTEST_LDADD} $(CRYPTO_LIBS) $(EXTRALIBS) @@ -1895,7 +1900,6 @@ noinst_HEADERS = \ include/encoding.h\ include/err.h\ include/error.h\ - include/fiemap.h\ include/filepath.h\ include/frag.h\ include/hash.h\ diff --git a/src/ceph-disk b/src/ceph-disk index db988b0d5e3..77a9d9a2612 100755 --- a/src/ceph-disk +++ b/src/ceph-disk @@ -182,7 +182,7 @@ class FilesystemTypeError(Error): def maybe_mkdir(*a, **kw): """ - Creates a new directory if it doesn't exist, removes + Creates a new directory if it doesn't exist, removes existing symlink before creating the directory. """ # remove any symlink, if it is there.. @@ -495,7 +495,7 @@ def _check_output(*args, **kwargs): def get_conf(cluster, variable): """ - Get the value of the given configuration variable from the + Get the value of the given configuration variable from the cluster. :raises: Error if call to ceph-conf fails. @@ -654,7 +654,7 @@ def mount( options, ): """ - Mounts a device with given filessystem type and + Mounts a device with given filessystem type and mount options to a tempfile path under /var/lib/ceph/tmp. """ # pick best-of-breed mount options based on fs type @@ -1307,18 +1307,37 @@ def auth_key( osd_id, keyring, ): - subprocess.check_call( - args=[ - '/usr/bin/ceph', - '--cluster', cluster, - '--name', 'client.bootstrap-osd', - '--keyring', keyring, - 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), - '-i', os.path.join(path, 'keyring'), - 'osd', 'allow *', - 'mon', 'allow rwx', - ], - ) + try: + # try dumpling+ cap scheme + subprocess.check_call( + args=[ + '/usr/bin/ceph', + '--cluster', cluster, + '--name', 'client.bootstrap-osd', + '--keyring', keyring, + 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), + '-i', os.path.join(path, 'keyring'), + 'osd', 'allow *', + 'mon', 'allow profile osd', + ], + ) + except subprocess.CalledProcessError as err: + if err.errno == errno.EACCES: + # try old cap scheme + subprocess.check_call( + args=[ + '/usr/bin/ceph', + '--cluster', cluster, + '--name', 'client.bootstrap-osd', + '--keyring', keyring, + 'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id), + '-i', os.path.join(path, 'keyring'), + 'osd', 'allow *', + 'mon', 'allow rwx', + ], + ) + else: + raise def move_mount( @@ -1698,7 +1717,7 @@ def main_activate(args): ) else: raise Error('%s is not a directory or block device', args.path) - + start_daemon( cluster=cluster, osd_id=osd_id, @@ -1951,7 +1970,7 @@ def list_dev(dev, uuid_map, journal_map): print '%s%s %s' % (prefix, dev, ', '.join(desc)) - + def main_list(args): partmap = list_all_partitions() @@ -1960,7 +1979,7 @@ def main_list(args): journal_map = {} for base, parts in sorted(partmap.iteritems()): for p in parts: - dev = '/dev/' + p + dev = get_dev_path(p) part_uuid = get_partition_uuid(dev) if part_uuid: uuid_map[part_uuid] = dev @@ -1980,11 +1999,11 @@ def main_list(args): for base, parts in sorted(partmap.iteritems()): if parts: - print '/dev/%s :' % base + print '%s :' % get_dev_path(base) for p in sorted(parts): - list_dev('/dev/' + p, uuid_map, journal_map) + list_dev(get_dev_path(p), uuid_map, journal_map) else: - list_dev('/dev/' + base, uuid_map, journal_map) + list_dev(get_dev_path(base), uuid_map, journal_map) ########################### diff --git a/src/ceph-rest-api b/src/ceph-rest-api index a44919acd6f..ae5245b4f76 100755 --- a/src/ceph-rest-api +++ b/src/ceph-rest-api @@ -33,34 +33,35 @@ def parse_args(): parser.add_argument('-c', '--conf', help='Ceph configuration file') parser.add_argument('--cluster', help='Ceph cluster name') parser.add_argument('-n', '--name', help='Ceph client name') + parser.add_argument('-i', '--id', help='Ceph client id') - return parser.parse_args() - + return parser.parse_known_args() # main -parsed_args = parse_args() -if parsed_args.conf: - os.environ['CEPH_CONF'] = parsed_args.conf -if parsed_args.cluster: - os.environ['CEPH_CLUSTER_NAME'] = parsed_args.cluster -if parsed_args.name: - os.environ['CEPH_NAME'] = parsed_args.name +parsed_args, rest = parse_args() # import now that env vars are available to imported module try: import ceph_rest_api -except Exception as e: +except EnvironmentError as e: print >> sys.stderr, "Error importing ceph_rest_api: ", str(e) sys.exit(1) -# importing ceph_rest_api has set module globals 'app', 'addr', and 'port' +# let other exceptions generate traceback + +app = ceph_rest_api.generate_app( + parsed_args.conf, + parsed_args.cluster, + parsed_args.name, + parsed_args.id, + rest, +) files = [os.path.split(fr[1])[-1] for fr in inspect.stack()] if 'pdb.py' in files: - ceph_rest_api.app.run(host=ceph_rest_api.addr, port=ceph_rest_api.port, - debug=True, use_reloader=False, use_debugger=False) + app.run(host=app.ceph_addr, port=app.ceph_port, + debug=True, use_reloader=False, use_debugger=False) else: - ceph_rest_api.app.run(host=ceph_rest_api.addr, port=ceph_rest_api.port, - debug=True) + app.run(host=app.ceph_addr, port=app.ceph_port) diff --git a/src/ceph.in b/src/ceph.in index e6806786e7e..22c54a48689 100755 --- a/src/ceph.in +++ b/src/ceph.in @@ -64,6 +64,9 @@ cluster_handle = None def osdids(): ret, outbuf, outs = json_command(cluster_handle, prefix='osd ls') + if ret == -errno.EINVAL: + # try old mon + ret, outbuf, outs = send_command(cluster_handle, cmd=['osd', 'ls']) if ret: raise RuntimeError('Can\'t contact mon for osd list') return [i for i in outbuf.split('\n') if i != ''] @@ -71,6 +74,10 @@ def osdids(): def monids(): ret, outbuf, outs = json_command(cluster_handle, prefix='mon dump', argdict={'format':'json'}) + if ret == -errno.EINVAL: + # try old mon + ret, outbuf, outs = send_command(cluster_handle, + cmd=['mon', 'dump', '--format=json']) if ret: raise RuntimeError('Can\'t contact mon for mon list') d = json.loads(outbuf) @@ -79,6 +86,10 @@ def monids(): def mdsids(): ret, outbuf, outs = json_command(cluster_handle, prefix='mds dump', argdict={'format':'json'}) + if ret == -errno.EINVAL: + # try old mon + ret, outbuf, outs = send_command(cluster_handle, + cmd=['mds', 'dump', '--format=json']) if ret: raise RuntimeError('Can\'t contact mon for mds list') d = json.loads(outbuf) @@ -100,8 +111,6 @@ def parse_cmdargs(args=None, target=''): parser.add_argument('-h', '--help', help='request mon help', action='store_true') - parser.add_argument('--help-all', help='request help for all daemons', - action='store_true') parser.add_argument('-c', '--conf', dest='cephconf', help='ceph configuration file') @@ -150,14 +159,16 @@ def parse_cmdargs(args=None, target=''): return parser, parsed_args, extras -def do_help(parser, args, help_all = False): +def do_help(parser, args): """ Print basic parser help - If the cluster is available: - get and print monitor help; - if help_all, print help for daemon commands as well + If the cluster is available, get and print monitor help """ + def help_for_sigs(sigs, partial=None): + sys.stdout.write(format_help(parse_json_funcsigs(sigs, 'cli'), + partial=partial)) + def help_for_target(target, partial=None): ret, outbuf, outs = json_command(cluster_handle, target=target, prefix='get_command_descriptions', @@ -167,40 +178,19 @@ def do_help(parser, args, help_all = False): "couldn't get command descriptions for {0}: {1}".\ format(target, outs) else: - sys.stdout.write(format_help(parse_json_funcsigs(outbuf, 'cli'), - partial)) + help_for_sigs(outbuf, partial) - parser.print_help() - print '\n' - if (cluster_handle): - help_for_target(target=('mon', ''), partial=' '.join(args)) - if help_all and cluster_handle: - # try/except in case there are no daemons of that type - try: - firstosd = osdids()[0] - print '\nOSD.{0} tell commands and pg pgid commands:\n\n'.\ - format(firstosd) - help_for_target(target=('osd', osdids()[0])) - - print '\nOSD daemon commands:\n\n' - sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'osd.' + firstosd), ['get_command_descriptions']), 'cli'))) - except: - pass + def hdr(s): + print '\n', s, '\n', '=' * len(s) - try: - firstmon = monids()[0] - print '\nmon.{0} daemon commands:\n\n'.format(firstmon) - sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'mon.' + firstmon), ['get_command_descriptions']), 'cli'))) - except: - pass + hdr('Monitor commands:') + partial = ' '.join(args) + parser.print_help() + print '\n' - try: - firstmds = mdsids()[0] - print '\nmds.{0} daemon commands:\n\n'.format(firstmds) - sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'mds.' + firstmds), ['get_command_descriptions']), 'cli'))) - except: - pass + if (cluster_handle): + help_for_target(target=('mon', ''), partial=partial) return 0 @@ -285,26 +275,57 @@ def format_help(cmddict, partial=None): return fullusage -def admin_socket(asok_path, cmd): - sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +def admin_socket(asok_path, cmd, format=''): + """ + Send a daemon (--admin-daemon) command 'cmd'. asok_path is the + path to the admin socket; cmd is a list of strings; format may be + set to one of the formatted forms to get output in that form + (daemon commands don't support 'plain' output). + """ + + def do_sockio(path, cmd): + """ helper: do all the actual low-level stream I/O """ + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.connect(path) + try: + sock.sendall(cmd + '\0') + len_str = sock.recv(4) + if len(len_str) < 4: + raise RuntimeError("no data returned from admin socket") + l, = struct.unpack(">I", len_str) + ret = '' + + got = 0 + while got < l: + bit = sock.recv(l - got) + ret += bit + got += len(bit) + + except Exception as e: + raise RuntimeError('exception: ' + str(e)) + return ret + try: - sock.connect(asok_path) - sock.sendall(' '.join(cmd) + '\0') + cmd_json = do_sockio(asok_path, + json.dumps({"prefix":"get_command_descriptions"})) + except Exception as e: + raise RuntimeError('exception getting command descriptions: ' + str(e)) - len_str = sock.recv(4) - if len(len_str) < 4: - raise RuntimeError("no data returned from admin socket") - l, = struct.unpack(">I", len_str) - ret = '' + if cmd == 'get_command_descriptions': + return cmd_json - got = 0 - while got < l: - bit = sock.recv(l - got) - ret += bit - got += len(bit) + sigdict = parse_json_funcsigs(cmd_json, 'cli') + valid_dict = validate_command(sigdict, cmd) + if not valid_dict: + return -errno.EINVAL + if format: + valid_dict['format'] = format + + try: + ret = do_sockio(asok_path, json.dumps(valid_dict)) except Exception as e: - raise RuntimeError('exception: {0}'.format(e)) + raise RuntimeError('exception: ' + str(e)) return ret @@ -344,10 +365,11 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose): if not got_command: if cmdargs: # Validate input args against list of sigs - valid_dict = validate_command(parsed_args, sigdict, cmdargs, - verbose) + valid_dict = validate_command(sigdict, cmdargs, verbose) if valid_dict: got_command = True + if parsed_args.output_format: + valid_dict['format'] = parsed_args.output_format else: return -errno.EINVAL, '', 'invalid command' else: @@ -360,8 +382,10 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose): return 0, '', '' cmdargs = parse_cmdargs(interactive_input.split())[2] target = find_cmd_target(cmdargs) - valid_dict = validate_command(parsed_args, sigdict, cmdargs) + valid_dict = validate_command(sigdict, cmdargs, verbose) if valid_dict: + if parsed_args.output_format: + valid_dict['format'] = parsed_args.output_format if verbose: print >> sys.stderr, "Submitting command ", valid_dict ret, outbuf, outs = json_command(cluster_handle, @@ -378,45 +402,6 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose): return json_command(cluster_handle, target=target, argdict=valid_dict, inbuf=inbuf) -OSD_TELL_MATCH = { \ - 'sig': ['tell', {'name':'target','type':'CephName'}], \ - 'matchnum': 2, \ - 'return_key': 'target', \ -} -PGID_MATCH = { \ - 'sig': ['pg', {'name':'pgid','type':'CephPgid'}], \ - 'matchnum': 2, \ - 'return_key': 'pgid', \ -} - -def find_cmd_target(childargs): - """ - Using a minimal validation, figure out whether the command - should be sent to a monitor or an osd. We do this before even - asking for the 'real' set of command signatures, so we can ask the - right daemon. - Returns ('osd', osdid), ('pg', pgid), or ('mon', '') - """ - sig = parse_funcsig(['tell', {'name':'target', 'type':'CephName'}]) - try: - valid_dict = validate(childargs, sig, partial=True) - if len(valid_dict) == 2: - name = CephName() - name.valid(valid_dict['target']) - return name.nametype, name.nameid - except ArgumentError: - pass - - sig = parse_funcsig(['pg', {'name':'pgid', 'type':'CephPgid'}]) - try: - valid_dict = validate(childargs, sig, partial=True) - if len(valid_dict) == 2: - return 'pg', valid_dict['pgid'] - except ArgumentError: - pass - - return 'mon', '' - def complete(sigdict, args, target): """ Command completion. Match as much of [args] as possible, @@ -509,9 +494,12 @@ def main(): conffile = parsed_args.cephconf # For now, --admin-daemon is handled as usual. Try it # first in case we can't connect() to the cluster + + format = parsed_args.output_format + if parsed_args.admin_socket: try: - print admin_socket(parsed_args.admin_socket, childargs) + print admin_socket(parsed_args.admin_socket, childargs, format) except Exception as e: print >> sys.stderr, 'admin_socket: {0}'.format(e) return 0 @@ -520,7 +508,7 @@ def main(): if len(childargs) > 2: if childargs[1].find('/') >= 0: try: - print admin_socket(childargs[1], childargs[2:]) + print admin_socket(childargs[1], childargs[2:], format) except Exception as e: print >> sys.stderr, 'admin_socket: {0}'.format(e) return 0 @@ -528,7 +516,7 @@ def main(): # try resolve daemon name path = ceph_conf('admin_socket', childargs[1]) try: - print admin_socket(path, childargs[2:]) + print admin_socket(path, childargs[2:], format) except Exception as e: print >> sys.stderr, 'admin_socket: {0}'.format(e) return 0 @@ -583,8 +571,8 @@ def main(): format(e.__class__.__name__) return 1 - if parsed_args.help or parsed_args.help_all: - return do_help(parser, childargs, parsed_args.help_all) + if parsed_args.help: + return do_help(parser, childargs) # implement -w/--watch_* # This is ugly, but Namespace() isn't quite rich enough. diff --git a/src/ceph_authtool.cc b/src/ceph_authtool.cc index 3075d9c69a7..f66a3c66eee 100644 --- a/src/ceph_authtool.cc +++ b/src/ceph_authtool.cc @@ -36,9 +36,15 @@ void usage() << " 'mount -o secret=..' argument\n" << " -C, --create-keyring will create a new keyring, overwriting any\n" << " existing keyringfile\n" - << " --gen-key will generate a new secret key for the\n" + << " -g, --gen-key will generate a new secret key for the\n" << " specified entityname\n" - << " --add-key will add an encoded key to the keyring\n" + << " --gen-print-key will generate a new secret key without set it\n" + << " to the keyringfile, prints the secret to stdout\n" + << " --import-keyring will import the content of a given keyring\n" + << " into the keyringfile\n" + << " -u, --set-uid sets the auid (authenticated user id) for the\n" + << " specified entityname\n" + << " -a, --add-key will add an encoded key to the keyring\n" << " --cap subsystem capability will set the capability for given subsystem\n" << " --caps capsfile will set all of capabilities associated with a\n" << " given key, for all subsystems" diff --git a/src/client/Client.cc b/src/client/Client.cc index eb7502c1530..5a9c5fdafcc 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -102,22 +102,24 @@ Client::CommandHook::CommandHook(Client *client) : { } -bool Client::CommandHook::call(std::string command, std::string args, bufferlist& out) +bool Client::CommandHook::call(std::string command, std::string args, + std::string format, bufferlist& out) { stringstream ss; - JSONFormatter formatter(true); + Formatter *f = new_formatter(format); m_client->client_lock.Lock(); if (command == "mds_requests") - m_client->dump_mds_requests(&formatter); + m_client->dump_mds_requests(f); else if (command == "mds_sessions") - m_client->dump_mds_sessions(&formatter); + m_client->dump_mds_sessions(f); else if (command == "dump_cache") - m_client->dump_cache(&formatter); + m_client->dump_cache(f); else assert(0 == "bad command registered"); m_client->client_lock.Unlock(); - formatter.flush(ss); + f->flush(ss); out.append(ss); + delete f; return true; } @@ -1303,7 +1305,7 @@ int Client::make_request(MetaRequest *request, // wait if (session->state == MetaSession::STATE_OPENING) { ldout(cct, 10) << "waiting for session to mds." << mds << " to open" << dendl; - wait_on_list(session->waiting_for_open); + wait_on_context_list(session->waiting_for_open); continue; } @@ -1522,7 +1524,7 @@ void Client::_closed_mds_session(MetaSession *s) { s->state = MetaSession::STATE_CLOSED; messenger->mark_down(s->con); - signal_cond_list(s->waiting_for_open); + signal_context_list(s->waiting_for_open); mount_cond.Signal(); remove_session_caps(s); kick_requests(s, true); @@ -1549,7 +1551,7 @@ void Client::handle_client_session(MClientSession *m) if (!unmounting) { connect_mds_targets(from); } - signal_cond_list(session->waiting_for_open); + signal_context_list(session->waiting_for_open); break; case CEPH_SESSION_CLOSE: @@ -1900,7 +1902,7 @@ void Client::handle_mds_map(MMDSMap* m) if (oldstate < MDSMap::STATE_ACTIVE) { kick_requests(p->second, false); kick_flushing_caps(p->second); - signal_cond_list(p->second->waiting_for_open); + signal_context_list(p->second->waiting_for_open); kick_maxsize_requests(p->second); wake_inode_waiters(p->second); } @@ -2607,6 +2609,24 @@ void Client::signal_cond_list(list<Cond*>& ls) (*it)->Signal(); } +void Client::wait_on_context_list(list<Context*>& ls) +{ + Cond cond; + bool done = false; + int r; + ls.push_back(new C_Cond(&cond, &done, &r)); + while (!done) + cond.Wait(client_lock); +} + +void Client::signal_context_list(list<Context*>& ls) +{ + while (!ls.empty()) { + ls.front()->complete(0); + ls.pop_front(); + } +} + void Client::wake_inode_waiters(MetaSession *s) { xlist<Cap*>::iterator iter = s->caps.begin(); @@ -7915,7 +7935,7 @@ void Client::ms_handle_remote_reset(Connection *con) case MetaSession::STATE_OPENING: { ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl; - list<Cond*> waiters; + list<Context*> waiters; waiters.swap(s->waiting_for_open); _closed_mds_session(s); MetaSession *news = _get_or_open_mds_session(mds); diff --git a/src/client/Client.h b/src/client/Client.h index 96e8937f287..bc1fbc0401b 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -196,7 +196,8 @@ class Client : public Dispatcher { Client *m_client; public: CommandHook(Client *client); - bool call(std::string command, std::string args, bufferlist& out); + bool call(std::string command, std::string args, std::string format, + bufferlist& out); }; CommandHook m_command_hook; @@ -344,6 +345,9 @@ protected: void wait_on_list(list<Cond*>& ls); void signal_cond_list(list<Cond*>& ls); + void wait_on_context_list(list<Context*>& ls); + void signal_context_list(list<Context*>& ls); + // -- metadata cache stuff // decrease inode ref. delete if dangling. diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h index 1f6a1240617..01575efb6ba 100644 --- a/src/client/MetaSession.h +++ b/src/client/MetaSession.h @@ -35,7 +35,7 @@ struct MetaSession { STATE_CLOSED, } state; - list<Cond*> waiting_for_open; + list<Context*> waiting_for_open; xlist<Cap*> caps; xlist<Inode*> flushing_caps; diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index ce0c6de1260..0812c9a3728 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -509,6 +509,7 @@ static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino) fuse_reply_err(req, -r); } +#if 0 static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids) { #ifdef HAVE_FUSE_GETGROUPS @@ -534,6 +535,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids) #endif return 0; } +#endif static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len) { @@ -702,8 +704,20 @@ int CephFuse::Handle::init(int argc, const char *argv[]) fuse_session_add_chan(se, ch); + /* + * this is broken: + * + * - the cb needs the request handle to be useful; we should get the + * gids in the method here in fuse_ll.c and pass the gid list in, + * not use a callback. + * - the callback mallocs the list but it is not free()'d + * + * so disable it for now... + client->ll_register_getgroups_cb(getgroups_cb, this); + */ + if (g_conf->fuse_use_invalidate_cb) client->ll_register_ino_invalidate_cb(invalidate_cb, this); diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index de2abe5665b..6cda4cba5c3 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -815,7 +815,7 @@ static int bi_log_iterate_entries(cls_method_context_t hctx, const string& marke map<string, bufferlist> keys; string filter_prefix, end_key; bufferlist start_bl; - bool start_key_added; + bool start_key_added = false; uint32_t i = 0; string key; diff --git a/src/common/Formatter.cc b/src/common/Formatter.cc index 357b287fe32..c08ea5b9a20 100644 --- a/src/common/Formatter.cc +++ b/src/common/Formatter.cc @@ -62,15 +62,19 @@ Formatter::~Formatter() } Formatter * -new_formatter(const std::string &type) +new_formatter(const std::string type) { - if (type == "json") + std::string mytype = type; + if (mytype == "") + mytype = "json-pretty"; + + if (mytype == "json") return new JSONFormatter(false); - else if (type == "json-pretty") + else if (mytype == "json-pretty") return new JSONFormatter(true); - else if (type == "xml") + else if (mytype == "xml") return new XMLFormatter(false); - else if (type == "xml-pretty") + else if (mytype == "xml-pretty") return new XMLFormatter(true); else return (Formatter *)NULL; @@ -250,6 +254,18 @@ void JSONFormatter::dump_format(const char *name, const char *fmt, ...) print_quoted_string(buf); } +void JSONFormatter::dump_format_unquoted(const char *name, const char *fmt, ...) +{ + char buf[LARGE_SIZE]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, LARGE_SIZE, fmt, ap); + va_end(ap); + + print_name(name); + m_ss << buf; +} + int JSONFormatter::get_len() const { return m_ss.str().size(); @@ -400,6 +416,21 @@ void XMLFormatter::dump_format(const char *name, const char *fmt, ...) m_ss << "\n"; } +void XMLFormatter::dump_format_unquoted(const char *name, const char *fmt, ...) +{ + char buf[LARGE_SIZE]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, LARGE_SIZE, fmt, ap); + va_end(ap); + + std::string e(name); + print_spaces(); + m_ss << "<" << e << ">" << buf << "</" << e << ">"; + if (m_pretty) + m_ss << "\n"; +} + int XMLFormatter::get_len() const { return m_ss.str().size(); diff --git a/src/common/Formatter.h b/src/common/Formatter.h index 8775c0cf9df..da730103f41 100644 --- a/src/common/Formatter.h +++ b/src/common/Formatter.h @@ -45,6 +45,7 @@ class Formatter { virtual void dump_string(const char *name, std::string s) = 0; virtual std::ostream& dump_stream(const char *name) = 0; virtual void dump_format(const char *name, const char *fmt, ...) = 0; + virtual void dump_format_unquoted(const char *name, const char *fmt, ...) = 0; virtual int get_len() const = 0; virtual void write_raw_data(const char *data) = 0; @@ -60,7 +61,7 @@ class Formatter { } }; -Formatter *new_formatter(const std::string &type); +Formatter *new_formatter(const std::string type); class JSONFormatter : public Formatter { public: @@ -79,6 +80,7 @@ class JSONFormatter : public Formatter { void dump_string(const char *name, std::string s); std::ostream& dump_stream(const char *name); void dump_format(const char *name, const char *fmt, ...); + void dump_format_unquoted(const char *name, const char *fmt, ...); int get_len() const; void write_raw_data(const char *data); @@ -119,6 +121,7 @@ class XMLFormatter : public Formatter { void dump_string(const char *name, std::string s); std::ostream& dump_stream(const char *name); void dump_format(const char *name, const char *fmt, ...); + void dump_format_unquoted(const char *name, const char *fmt, ...); int get_len() const; void write_raw_data(const char *data); diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc index f7ab3501dff..e73f3ce0a0c 100644 --- a/src/common/admin_socket.cc +++ b/src/common/admin_socket.cc @@ -310,6 +310,18 @@ bool AdminSocket::do_accept() bool rval = false; + map<string, cmd_vartype> cmdmap; + string format; + vector<string> cmdvec; + stringstream errss; + cmdvec.push_back(cmd); + if (!cmdmap_from_json(cmdvec, &cmdmap, errss)) { + ldout(m_cct, 0) << "AdminSocket: " << errss << dendl; + return false; + } + cmd_getval(m_cct, cmdmap, "format", format); + cmd_getval(m_cct, cmdmap, "prefix", c); + string firstword; if (c.find(" ") == string::npos) firstword = c; @@ -341,7 +353,7 @@ bool AdminSocket::do_accept() string args; if (match != c) args = c.substr(match.length() + 1); - bool success = p->second->call(match, args, out); + bool success = p->second->call(match, args, format, out); if (!success) { ldout(m_cct, 0) << "AdminSocket: request '" << match << "' args '" << args << "' to " << p->second << " failed" << dendl; @@ -378,8 +390,7 @@ int AdminSocket::register_command(std::string command, std::string cmddesc, Admi ldout(m_cct, 5) << "register_command " << command << " hook " << hook << dendl; m_hooks[command] = hook; m_descs[command] = cmddesc; - if (help.length()) - m_help[command] = help; + m_help[command] = help; ret = 0; } m_lock.Unlock(); @@ -406,7 +417,8 @@ int AdminSocket::unregister_command(std::string command) class VersionHook : public AdminSocketHook { public: - virtual bool call(std::string command, std::string args, bufferlist& out) { + virtual bool call(std::string command, std::string args, std::string format, + bufferlist& out) { if (command == "0") { out.append(CEPH_ADMIN_SOCK_VERSION); } else { @@ -429,18 +441,20 @@ class HelpHook : public AdminSocketHook { AdminSocket *m_as; public: HelpHook(AdminSocket *as) : m_as(as) {} - bool call(string command, string args, bufferlist& out) { - JSONFormatter jf(true); - jf.open_object_section("help"); + bool call(string command, string args, string format, bufferlist& out) { + Formatter *f = new_formatter(format); + f->open_object_section("help"); for (map<string,string>::iterator p = m_as->m_help.begin(); p != m_as->m_help.end(); ++p) { - jf.dump_string(p->first.c_str(), p->second); + if (p->second.length()) + f->dump_string(p->first.c_str(), p->second); } - jf.close_section(); + f->close_section(); ostringstream ss; - jf.flush(ss); + f->flush(ss); out.append(ss.str()); + delete f; return true; } }; @@ -449,7 +463,7 @@ class GetdescsHook : public AdminSocketHook { AdminSocket *m_as; public: GetdescsHook(AdminSocket *as) : m_as(as) {} - bool call(string command, string args, bufferlist& out) { + bool call(string command, string args, string format, bufferlist& out) { int cmdnum = 0; JSONFormatter jf(false); jf.open_object_section("command_descriptions"); diff --git a/src/common/admin_socket.h b/src/common/admin_socket.h index c390bca0382..30c5eb96ab8 100644 --- a/src/common/admin_socket.h +++ b/src/common/admin_socket.h @@ -29,7 +29,8 @@ class CephContext; class AdminSocketHook { public: - virtual bool call(std::string command, std::string args, bufferlist& out) = 0; + virtual bool call(std::string command, std::string args, std::string format, + bufferlist& out) = 0; virtual ~AdminSocketHook() {}; }; diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc index cad980bb2a6..6b227d8689e 100644 --- a/src/common/ceph_context.cc +++ b/src/common/ceph_context.cc @@ -156,44 +156,46 @@ class CephContextHook : public AdminSocketHook { public: CephContextHook(CephContext *cct) : m_cct(cct) {} - bool call(std::string command, std::string args, bufferlist& out) { - m_cct->do_command(command, args, &out); + bool call(std::string command, std::string args, std::string format, + bufferlist& out) { + m_cct->do_command(command, args, format, &out); return true; } }; -void CephContext::do_command(std::string command, std::string args, bufferlist *out) +void CephContext::do_command(std::string command, std::string args, + std::string format, bufferlist *out) { + Formatter *f = new_formatter(format); lgeneric_dout(this, 1) << "do_command '" << command << "' '" << args << "'" << dendl; if (command == "perfcounters_dump" || command == "1" || command == "perf dump") { - _perf_counters_collection->write_json_to_buf(*out, false); + _perf_counters_collection->dump_formatted(f, false); } else if (command == "perfcounters_schema" || command == "2" || command == "perf schema") { - _perf_counters_collection->write_json_to_buf(*out, true); + _perf_counters_collection->dump_formatted(f, true); } else { - JSONFormatter jf(true); - jf.open_object_section(command.c_str()); + f->open_object_section(command.c_str()); if (command == "config show") { - _conf->show_config(&jf); + _conf->show_config(f); } else if (command == "config set") { std::string var = args; size_t pos = var.find(' '); if (pos == string::npos) { - jf.dump_string("error", "syntax error: 'config set <var> <value>'"); + f->dump_string("error", "syntax error: 'config set <var> <value>'"); } else { std::string val = var.substr(pos+1); var.resize(pos); int r = _conf->set_val(var.c_str(), val.c_str()); if (r < 0) { - jf.dump_stream("error") << "error setting '" << var << "' to '" << val << "': " << cpp_strerror(r); + f->dump_stream("error") << "error setting '" << var << "' to '" << val << "': " << cpp_strerror(r); } else { ostringstream ss; _conf->apply_changes(&ss); - jf.dump_string("success", ss.str()); + f->dump_string("success", ss.str()); } } } else if (command == "config get") { @@ -202,9 +204,9 @@ void CephContext::do_command(std::string command, std::string args, bufferlist * char *tmp = buf; int r = _conf->get_val(args.c_str(), &tmp, sizeof(buf)); if (r < 0) { - jf.dump_stream("error") << "error getting '" << args << "': " << cpp_strerror(r); + f->dump_stream("error") << "error getting '" << args << "': " << cpp_strerror(r); } else { - jf.dump_string(args.c_str(), buf); + f->dump_string(args.c_str(), buf); } } else if (command == "log flush") { _log->flush(); @@ -218,11 +220,10 @@ void CephContext::do_command(std::string command, std::string args, bufferlist * else { assert(0 == "registered under wrong command?"); } - ostringstream ss; - jf.close_section(); - jf.flush(ss); - out->append(ss.str()); + f->close_section(); } + f->flush(*out); + delete f; lgeneric_dout(this, 1) << "do_command '" << command << "' '" << args << "' result is " << out->length() << " bytes" << dendl; }; diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h index 1678680fa6d..85618e35219 100644 --- a/src/common/ceph_context.h +++ b/src/common/ceph_context.h @@ -97,7 +97,8 @@ public: /** * process an admin socket command */ - void do_command(std::string command, std::string args, bufferlist *out); + void do_command(std::string command, std::string args, std::string foramt, + bufferlist *out); /** * get a crypto handler diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b43808e211c..3b9d025393f 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -443,6 +443,7 @@ OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100) OPTION(osd_auto_weight, OPT_BOOL, false) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored +OPTION(osd_open_classes_on_start, OPT_BOOL, true) OPTION(osd_check_for_log_corruption, OPT_BOOL, false) OPTION(osd_use_stale_snap, OPT_BOOL, false) OPTION(osd_rollback_to_cluster_snap, OPT_STR, "") @@ -501,20 +502,20 @@ OPTION(osd_max_attr_size, OPT_U64, 65536) OPTION(filestore, OPT_BOOL, false) /// filestore wb throttle limits -OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 10<<20) -OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 100<<20) -OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 100) -OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 1000) -OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 100) -OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 10<<20) -OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 100<<20) -OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 10) -OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 100) -OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 10) +OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040) +OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400) +OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500) +OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000) +OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500) +OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040) +OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400) +OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500) +OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000) +OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500) /// These must be less than the fd limit -OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 256) -OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 100) +OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000) +OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000) // Tests index failure paths OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0) diff --git a/src/common/fiemap.cc b/src/common/fiemap.cc deleted file mode 100644 index a1d5fbe9396..00000000000 --- a/src/common/fiemap.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2010 Canonical - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - */ - -/* - * Author Colin Ian King, colin.king@canonical.com - */ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <fcntl.h> - -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/ioctl.h> - -#if defined(__linux__) -#include <linux/fs.h> -#endif -#include "include/inttypes.h" -#include "include/fiemap.h" - -struct fiemap *read_fiemap(int fd) -{ - struct fiemap *fiemap; - struct fiemap *_realloc_fiemap = NULL; - int extents_size; - int r; - - if ((fiemap = (struct fiemap*)malloc(sizeof(struct fiemap))) == NULL) { - fprintf(stderr, "Out of memory allocating fiemap\n"); - return NULL; - } - memset(fiemap, 0, sizeof(struct fiemap)); - - fiemap->fm_start = 0; - fiemap->fm_length = ~0; /* Lazy */ - fiemap->fm_flags = 0; - fiemap->fm_extent_count = 0; - fiemap->fm_mapped_extents = 0; - - /* Find out how many extents there are */ - r = ioctl(fd, FS_IOC_FIEMAP, fiemap); - if (r < 0) { - goto done_err; - } - - if (!fiemap->fm_mapped_extents) { - goto done_err; - } - - /* Read in the extents */ - extents_size = sizeof(struct fiemap_extent) * (fiemap->fm_mapped_extents); - - /* Resize fiemap to allow us to read in the extents */ - - if ((_realloc_fiemap = (struct fiemap*)realloc(fiemap,sizeof(struct fiemap) + - extents_size)) == NULL) { - fprintf(stderr, "Out of memory allocating fiemap\n"); - goto done_err; - } else { - fiemap = _realloc_fiemap; - } - - memset(fiemap->fm_extents, 0, extents_size); - fiemap->fm_extent_count = fiemap->fm_mapped_extents; - fiemap->fm_mapped_extents = 0; - - if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) { - fprintf(stderr, "fiemap ioctl() failed\n"); - goto done_err; - } - - return fiemap; -done_err: - free(fiemap); - return NULL; -} - diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index 67a777497b3..1dd4cdabd9d 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -15,6 +15,7 @@ #include "common/perf_counters.h" #include "common/dout.h" #include "common/errno.h" +#include "common/Formatter.h" #include <errno.h> #include <inttypes.h> @@ -72,21 +73,20 @@ void PerfCountersCollection::clear() } } -void PerfCountersCollection::write_json_to_buf(bufferlist& bl, bool schema) +void PerfCountersCollection::dump_formatted(Formatter *f, bool schema) { Mutex::Locker lck(m_lock); - bl.append('{'); + f->open_object_section("perfcounter_collection"); perf_counters_set_t::iterator l = m_loggers.begin(); perf_counters_set_t::iterator l_end = m_loggers.end(); if (l != l_end) { while (true) { - (*l)->write_json_to_buf(bl, schema); + (*l)->dump_formatted(f, schema); if (++l == l_end) break; - bl.append(','); } } - bl.append('}'); + f->close_section(); } // --------------------------- @@ -203,34 +203,54 @@ utime_t PerfCounters::tget(int idx) const return utime_t(data.u64 / 1000000000ull, data.u64 % 1000000000ull); } -void PerfCounters::write_json_to_buf(bufferlist& bl, bool schema) +void PerfCounters::dump_formatted(Formatter *f, bool schema) { - char buf[512]; Mutex::Locker lck(m_lock); - snprintf(buf, sizeof(buf), "\"%s\":{", m_name.c_str()); - bl.append(buf); - + f->open_object_section(m_name.c_str()); perf_counter_data_vec_t::const_iterator d = m_data.begin(); perf_counter_data_vec_t::const_iterator d_end = m_data.end(); if (d == d_end) { - bl.append('}'); + f->close_section(); return; } while (true) { - const perf_counter_data_any_d &data(*d); - buf[0] = '\0'; - if (schema) - data.write_schema_json(buf, sizeof(buf)); - else - data.write_json(buf, sizeof(buf)); - - bl.append(buf); + if (schema) { + f->open_object_section(d->name); + f->dump_int("type", d->type); + f->close_section(); + } else { + if (d->type & PERFCOUNTER_LONGRUNAVG) { + f->open_object_section(d->name); + if (d->type & PERFCOUNTER_U64) { + f->dump_unsigned("avgcount", d->avgcount); + f->dump_unsigned("sum", d->u64); + } else if (d->type & PERFCOUNTER_TIME) { + f->dump_unsigned("avgcount", d->avgcount); + f->dump_format_unquoted("sum", "%"PRId64".%09"PRId64, + d->u64 / 1000000000ull, + d->u64 % 1000000000ull); + } else { + assert(0); + } + f->close_section(); + } else { + if (d->type & PERFCOUNTER_U64) { + f->dump_unsigned(d->name, d->u64); + } else if (d->type & PERFCOUNTER_TIME) { + f->dump_format_unquoted(d->name, "%"PRId64".%09"PRId64, + d->u64 / 1000000000ull, + d->u64 % 1000000000ull); + } else { + assert(0); + } + } + } + if (++d == d_end) break; - bl.append(','); } - bl.append('}'); + f->close_section(); } const std::string &PerfCounters::get_name() const @@ -258,42 +278,6 @@ PerfCounters::perf_counter_data_any_d::perf_counter_data_any_d() { } -void PerfCounters::perf_counter_data_any_d::write_schema_json(char *buf, size_t buf_sz) const -{ - snprintf(buf, buf_sz, "\"%s\":{\"type\":%d}", name, type); -} - -void PerfCounters::perf_counter_data_any_d::write_json(char *buf, size_t buf_sz) const -{ - if (type & PERFCOUNTER_LONGRUNAVG) { - if (type & PERFCOUNTER_U64) { - snprintf(buf, buf_sz, "\"%s\":{\"avgcount\":%" PRId64 "," - "\"sum\":%" PRId64 "}", - name, avgcount, u64); - } - else if (type & PERFCOUNTER_TIME) { - snprintf(buf, buf_sz, "\"%s\":{\"avgcount\":%" PRId64 "," - "\"sum\":%llu.%09llu}", - name, avgcount, u64 / 1000000000ull, u64 % 1000000000ull); - } - else { - assert(0); - } - } - else { - if (type & PERFCOUNTER_U64) { - snprintf(buf, buf_sz, "\"%s\":%" PRId64, - name, u64); - } - else if (type & PERFCOUNTER_TIME) { - snprintf(buf, buf_sz, "\"%s\":%llu.%09llu", name, u64 / 1000000000ull, u64 % 1000000000ull); - } - else { - assert(0); - } - } -} - PerfCountersBuilder::PerfCountersBuilder(CephContext *cct, const std::string &name, int first, int last) : m_perf_counters(new PerfCounters(cct, name, first, last)) diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h index 269a32f2c46..ec10f9a9282 100644 --- a/src/common/perf_counters.h +++ b/src/common/perf_counters.h @@ -76,7 +76,7 @@ public: void tinc(int idx, utime_t v); utime_t tget(int idx) const; - void write_json_to_buf(ceph::bufferlist& bl, bool schema); + void dump_formatted(ceph::Formatter *f, bool schema); const std::string& get_name() const; void set_name(std::string s) { @@ -136,7 +136,7 @@ public: void add(class PerfCounters *l); void remove(class PerfCounters *l); void clear(); - void write_json_to_buf(ceph::bufferlist& bl, bool schema); + void dump_formatted(ceph::Formatter *f, bool schema); private: CephContext *m_cct; diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp index 8669d063a79..a62aa0d9ce3 100644 --- a/src/common/sharedptr_registry.hpp +++ b/src/common/sharedptr_registry.hpp @@ -29,6 +29,7 @@ class SharedPtrRegistry { public: typedef std::tr1::shared_ptr<V> VPtr; typedef std::tr1::weak_ptr<V> WeakVPtr; + int waiting; private: Mutex lock; Cond cond; @@ -52,7 +53,10 @@ private: friend class OnRemoval; public: - SharedPtrRegistry() : lock("SharedPtrRegistry::lock") {} + SharedPtrRegistry() : + waiting(0), + lock("SharedPtrRegistry::lock") + {} bool get_next(const K &key, pair<K, V> *next) { VPtr next_val; @@ -70,26 +74,33 @@ public: VPtr lookup(const K &key) { Mutex::Locker l(lock); + waiting++; while (1) { if (contents.count(key)) { VPtr retval = contents[key].lock(); - if (retval) + if (retval) { + waiting--; return retval; + } } else { break; } cond.Wait(lock); } + waiting--; return VPtr(); } VPtr lookup_or_create(const K &key) { Mutex::Locker l(lock); + waiting++; while (1) { if (contents.count(key)) { VPtr retval = contents[key].lock(); - if (retval) + if (retval) { + waiting--; return retval; + } } else { break; } @@ -97,6 +108,7 @@ public: } VPtr retval(new V(), OnRemoval(this, key)); contents[key] = retval; + waiting--; return retval; } @@ -109,11 +121,14 @@ public: template<class A> VPtr lookup_or_create(const K &key, const A &arg) { Mutex::Locker l(lock); + waiting++; while (1) { if (contents.count(key)) { VPtr retval = contents[key].lock(); - if (retval) + if (retval) { + waiting--; return retval; + } } else { break; } @@ -121,8 +136,11 @@ public: } VPtr retval(new V(arg), OnRemoval(this, key)); contents[key] = retval; + waiting--; return retval; } + + friend class SharedPtrRegistryTest; }; #endif diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc index 25f1a0a1992..ce604fe1e5d 100644 --- a/src/global/signal_handler.cc +++ b/src/global/signal_handler.cc @@ -19,6 +19,7 @@ #include "global/pidfile.h" #include "global/signal_handler.h" +#include <poll.h> #include <signal.h> #include <sstream> #include <stdlib.h> @@ -189,25 +190,27 @@ struct SignalHandler : public Thread { // thread entry point void *entry() { while (!stop) { - // create fd set - fd_set rfds; - FD_ZERO(&rfds); - FD_SET(pipefd[0], &rfds); - int max_fd = pipefd[0]; + // build fd list + struct pollfd fds[33]; lock.Lock(); + int num_fds = 0; + fds[num_fds].fd = pipefd[0]; + fds[num_fds].events = POLLIN | POLLOUT | POLLERR; + fds[num_fds].revents = 0; + ++num_fds; for (unsigned i=0; i<32; i++) { if (handlers[i]) { - int fd = handlers[i]->pipefd[0]; - FD_SET(fd, &rfds); - if (fd > max_fd) - max_fd = fd; + fds[num_fds].fd = handlers[i]->pipefd[0]; + fds[num_fds].events = POLLIN | POLLOUT | POLLERR; + fds[num_fds].revents = 0; + ++num_fds; } } lock.Unlock(); // wait for data on any of those pipes - int r = select(max_fd + 1, &rfds, NULL, NULL, NULL); + int r = poll(fds, num_fds, -1); if (stop) break; if (r > 0) { diff --git a/src/include/fiemap.h b/src/include/fiemap.h deleted file mode 100644 index 846adb155ff..00000000000 --- a/src/include/fiemap.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __CEPH_FIEMAP_H -#define __CEPH_FIEMAP_H - -#include "acconfig.h" - -/* - * the header is missing on most systems. for the time being at - * least, include our own copy in the repo. - */ -#ifdef HAVE_FIEMAP_H -# include <linux/fiemap.h> -#else -# include "linux_fiemap.h" -#endif - -#if defined(__linux__) -#include <linux/ioctl.h> -#elif defined(__FreeBSD__) -#include <sys/ioctl.h> -#endif -#ifndef FS_IOC_FIEMAP -# define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) -#endif - -extern "C" struct fiemap *read_fiemap(int fd); - -#endif diff --git a/src/init-ceph.in b/src/init-ceph.in index b0ed353f8e7..7d003e6370c 100644 --- a/src/init-ceph.in +++ b/src/init-ceph.in @@ -27,7 +27,7 @@ else fi usage_exit() { - echo "usage: $0 [options] {start|stop|restart} [mon|osd|mds]..." + echo "usage: $0 [options] {start|stop|restart|condrestart} [mon|osd|mds]..." printf "\t-c ceph.conf\n" printf "\t--valgrind\trun via valgrind\n" printf "\t--hostname [hostname]\toverride hostname lookup\n" @@ -241,7 +241,7 @@ for name in $what; do case "$command" in start) # Increase max_open_files, if the configuration calls for it. - get_conf max_open_files "8192" "max open files" + get_conf max_open_files "32768" "max open files" # build final command wrap="" @@ -414,6 +414,15 @@ for name in $what; do $0 $options start $name ;; + condrestart) + if daemon_is_running $name ceph-$type $id $pid_file; then + $0 $options stop $name + $0 $options start $name + else + echo "$name: not running." + fi + ;; + cleanlogs) echo removing logs [ -n "$log_dir" ] && do_cmd "rm -f $log_dir/$type.$id.*" diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc index f68125fb8c0..e8dd019af3a 100644 --- a/src/librados/RadosClient.cc +++ b/src/librados/RadosClient.cc @@ -642,6 +642,10 @@ int librados::RadosClient::osd_command(int osd, vector<string>& cmd, bool done; int ret; tid_t tid; + + if (osd < 0) + return -EINVAL; + lock.Lock(); // XXX do anything with tid? int r = objecter->osd_command(osd, cmd, inbl, &tid, poutbl, prs, diff --git a/src/messages/MMonScrub.h b/src/messages/MMonScrub.h index ab4588f4a76..b16728bcdd5 100644 --- a/src/messages/MMonScrub.h +++ b/src/messages/MMonScrub.h @@ -31,7 +31,7 @@ public: switch (op) { case OP_SCRUB: return "scrub"; case OP_RESULT: return "result"; - default: assert("unknown op type"); return NULL; + default: assert(0 == "unknown op type"); return NULL; } } diff --git a/src/messages/MMonSync.h b/src/messages/MMonSync.h index a5415a8f451..48229d15bcc 100644 --- a/src/messages/MMonSync.h +++ b/src/messages/MMonSync.h @@ -49,7 +49,7 @@ public: case OP_CHUNK: return "chunk"; case OP_LAST_CHUNK: return "last_chunk"; case OP_NO_COOKIE: return "no_cookie"; - default: assert("unknown op type"); return NULL; + default: assert(0 == "unknown op type"); return NULL; } } diff --git a/src/mon/DataHealthService.cc b/src/mon/DataHealthService.cc index a55e8c392e2..6e8aa313a36 100644 --- a/src/mon/DataHealthService.cc +++ b/src/mon/DataHealthService.cc @@ -93,7 +93,7 @@ health_status_t DataHealthService::get_health( } if (f) { - f->open_object_section(mon_name.c_str()); + f->open_object_section("mon"); f->dump_string("name", mon_name.c_str()); f->dump_int("kb_total", stats.kb_total); f->dump_int("kb_used", stats.kb_used); diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 172f0f1e3db..f0fb4ae8332 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -563,7 +563,9 @@ bool MDSMonitor::preprocess_command(MMonCommand *m) if (prefix == "mds stat") { if (f) { + f->open_object_section("mds_stat"); dump_info(f.get()); + f->close_section(); f->flush(ds); } else { ds << mdsmap; diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc index 8e35b775247..b03873ad7dd 100644 --- a/src/mon/MonCap.cc +++ b/src/mon/MonCap.cc @@ -133,6 +133,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const if (profile == "mds") { profile_grants.push_back(MonCapGrant("mds", MON_CAP_ALL)); profile_grants.push_back(MonCapGrant("mon", MON_CAP_R)); + profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); profile_grants.push_back(MonCapGrant("log", MON_CAP_W)); } if (profile == "osd" || profile == "mds" || profile == "mon") { @@ -149,7 +150,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.push_back(MonCapGrant("osd create")); profile_grants.push_back(MonCapGrant("osd crush set")); // FIXME: constraint this further? profile_grants.push_back(MonCapGrant("auth add")); - profile_grants.back().command_args["name"] = StringConstraint("", "osd."); + profile_grants.back().command_args["entity"] = StringConstraint("", "osd."); profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile osd", ""); profile_grants.back().command_args["caps_osd"] = StringConstraint("allow *", ""); } @@ -158,7 +159,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const profile_grants.push_back(MonCapGrant("osd", MON_CAP_R)); // read osdmap profile_grants.push_back(MonCapGrant("mon getmap")); profile_grants.push_back(MonCapGrant("auth get-or-create")); // FIXME: this can expose other mds keys - profile_grants.back().command_args["name"] = StringConstraint("", "mds."); + profile_grants.back().command_args["entity"] = StringConstraint("", "mds."); profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile mds", ""); profile_grants.back().command_args["caps_osd"] = StringConstraint("allow rwx", ""); profile_grants.back().command_args["caps_mds"] = StringConstraint("allow", ""); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 5b950ca1aef..0980893bf9b 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -482,7 +482,7 @@ COMMAND("osd pool delete " \ "name=pool2,type=CephPoolname " \ "name=sure,type=CephChoices,strings=--yes-i-really-really-mean-it", \ "delete pool (say pool twice, add --yes-i-really-really-mean-it)", \ - "osd", "r", "cli,rest") + "osd", "rw", "cli,rest") COMMAND("osd pool rename " \ "name=srcpool,type=CephPoolname " \ "name=destpool,type=CephPoolname", \ @@ -494,8 +494,7 @@ COMMAND("osd pool get " \ COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset " \ - "name=val,type=CephInt " \ - "name=sure,type=CephChoices,strings=--allow-experimental-feature,req=false", \ + "name=val,type=CephInt", \ "set pool parameter <var> to <val>", "osd", "rw", "cli,rest") // 'val' is a CephString because it can include a unit. Perhaps // there should be a Python type for validation/conversion of strings diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index f537c915945..119ef740aa8 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -143,6 +143,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, elector(this), leader(0), quorum_features(0), + scrub_version(0), // sync state sync_provider_count(0), @@ -224,21 +225,20 @@ class AdminHook : public AdminSocketHook { Monitor *mon; public: AdminHook(Monitor *m) : mon(m) {} - bool call(std::string command, std::string args, bufferlist& out) { + bool call(std::string command, std::string args, std::string format, + bufferlist& out) { stringstream ss; - mon->do_admin_command(command, args, ss); + mon->do_admin_command(command, args, format, ss); out.append(ss); return true; } }; -void Monitor::do_admin_command(string command, string args, ostream& ss) +void Monitor::do_admin_command(string command, string args, string format, + ostream& ss) { Mutex::Locker l(lock); - map<string, cmd_vartype> cmdmap; - string format; - cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain")); boost::scoped_ptr<Formatter> f(new_formatter(format)); if (command == "mon_status") @@ -1534,11 +1534,23 @@ bool Monitor::_allowed_command(MonSession *s, map<string, cmd_vartype>& cmd) map<string,string> strmap; for (map<string, cmd_vartype>::const_iterator p = cmd.begin(); p != cmd.end(); ++p) { - if (p->first != "prefix") { - strmap[p->first] = cmd_vartype_stringify(p->second); + if (p->first == "prefix") + continue; + if (p->first == "caps") { + vector<string> cv; + if (cmd_getval(g_ceph_context, cmd, "caps", cv) && + cv.size() % 2 == 0) { + for (unsigned i = 0; i < cv.size(); i += 2) { + string k = string("caps_") + cv[i]; + strmap[k] = cv[i + 1]; + } + continue; + } } + strmap[p->first] = cmd_vartype_stringify(p->second); } + dout(20) << __func__ << " strmap " << strmap << dendl; if (s->caps.is_capable(g_ceph_context, s->inst.name, "", prefix, strmap, false, false, true)) { retval = true; @@ -1589,6 +1601,14 @@ void Monitor::_quorum_status(Formatter *f, ostream& ss) f->dump_int("mon", *p); f->close_section(); // quorum + set<string> quorum_names = get_quorum_names(); + f->open_array_section("quorum_names"); + for (set<string>::iterator p = quorum_names.begin(); p != quorum_names.end(); ++p) + f->dump_string("mon", *p); + f->close_section(); // quorum_names + + f->dump_string("quorum_leader_name", quorum.empty() ? string() : monmap->get_name(*quorum.begin())); + f->open_object_section("monmap"); monmap->dump(f); f->close_section(); // monmap @@ -1832,6 +1852,7 @@ void Monitor::get_status(stringstream &ss, Formatter *f) f->open_object_section("mdsmap"); mdsmon()->mdsmap.print_summary(f, NULL); f->close_section(); + f->close_section(); } else { ss << " cluster " << monmap->get_fsid() << "\n"; ss << " health " << health << "\n"; diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 82b08816702..bed48ecee34 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -745,7 +745,8 @@ public: int write_fsid(); int write_fsid(MonitorDBStore::Transaction &t); - void do_admin_command(std::string command, std::string args, ostream& ss); + void do_admin_command(std::string command, std::string args, + std::string format, ostream& ss); private: // don't allow copying diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 195f66350d8..5ec1583b82f 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -322,8 +322,8 @@ bool MonmapMonitor::prepare_command(MMonCommand *m) string name; cmd_getval(g_ceph_context, cmdmap, "name", name); if (!pending_map.contains(name)) { - err = -ENOENT; - ss << "mon " << name << " does not exist"; + err = 0; + ss << "mon " << name << " does not exist or has already been removed"; goto out; } diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 20e4eac88cb..c6db052a591 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -125,6 +125,51 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) version_t latest_full = get_version_latest_full(); if (latest_full == 0 && get_first_committed() > 1) latest_full = get_first_committed(); + + if (latest_full > 0) { + // make sure we can really believe get_version_latest_full(); see + // 76cd7ac1c2094b34ad36bea89b2246fa90eb2f6d + bufferlist test; + get_version_full(latest_full, test); + if (test.length() == 0) { + dout(10) << __func__ << " ignoring recorded latest_full as it is missing; fallback to search" << dendl; + latest_full = 0; + } + } + if (get_first_committed() > 1 && + latest_full < get_first_committed()) { + /* a bug introduced in 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6 would lead + * us to not update the on-disk latest_full key. Upon trim, the actual + * version would cease to exist but we would still point to it. This + * makes sure we get it pointing to a proper version. + */ + version_t lc = get_last_committed(); + version_t fc = get_first_committed(); + + dout(10) << __func__ << " looking for valid full map in interval" + << " [" << fc << ", " << lc << "]" << dendl; + + latest_full = 0; + for (version_t v = lc; v >= fc; v--) { + string full_key = "full_" + stringify(v); + if (mon->store->exists(get_service_name(), full_key)) { + dout(10) << __func__ << " found latest full map v " << v << dendl; + latest_full = v; + break; + } + } + + // if we trigger this, then there's something else going with the store + // state, and we shouldn't want to work around it without knowing what + // exactly happened. + assert(latest_full > 0); + MonitorDBStore::Transaction t; + put_version_latest_full(&t, latest_full); + mon->store->apply_transaction(t); + dout(10) << __func__ << " updated the on-disk full map version to " + << latest_full << dendl; + } + if ((latest_full > 0) && (latest_full > osdmap.epoch)) { bufferlist latest_bl; get_version_full(latest_full, latest_bl); @@ -526,17 +571,6 @@ void OSDMonitor::encode_pending(MonitorDBStore::Transaction *t) put_last_committed(t, pending_inc.epoch); } -void OSDMonitor::encode_full(MonitorDBStore::Transaction *t) -{ - dout(10) << __func__ << " osdmap e " << osdmap.epoch << dendl; - assert(get_last_committed() == osdmap.epoch); - - bufferlist osdmap_bl; - osdmap.encode(osdmap_bl); - put_version_full(t, osdmap.epoch, osdmap_bl); - put_version_latest_full(t, osdmap.epoch); -} - void OSDMonitor::share_map_with_random_osd() { if (osdmap.get_num_up_osds() == 0) { @@ -586,6 +620,7 @@ void OSDMonitor::encode_trim_extra(MonitorDBStore::Transaction *tx, version_t fi bufferlist bl; get_version_full(first, bl); put_version_full(tx, first, bl); + put_version_latest_full(tx, first); } // ------------- @@ -1930,6 +1965,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) if (prefix == "osd stat") { osdmap.print_summary(f.get(), ds); + if (f) + f->flush(ds); rdata.append(ds); } else if (prefix == "osd dump" || diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index d7cb8fdf369..f66c676b53d 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -150,11 +150,15 @@ private: void update_from_paxos(bool *need_bootstrap); void create_pending(); // prepare a new pending void encode_pending(MonitorDBStore::Transaction *t); - virtual void encode_full(MonitorDBStore::Transaction *t); void on_active(); void on_shutdown(); /** + * we haven't delegated full version stashing to paxosservice for some time + * now, making this function useless in current context. + */ + virtual void encode_full(MonitorDBStore::Transaction *t) { } + /** * do not let paxosservice periodically stash full osdmaps, or we will break our * locally-managed full maps. (update_from_paxos loads the latest and writes them * out going forward from there, but if we just synced that may mean we skip some.) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 8b37376ba7a..86ad87bd929 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -788,12 +788,16 @@ void PGMap::print_summary(Formatter *f, ostream *out) const { std::stringstream ss; if (f) - f->open_object_section("pgs_by_state"); + f->open_array_section("pgs_by_state"); + for (hash_map<int,int>::const_iterator p = num_pg_by_state.begin(); p != num_pg_by_state.end(); ++p) { if (f) { - f->dump_unsigned(pg_state_string(p->first).c_str(), p->second); + f->open_object_section("pgs_by_state_element"); + f->dump_string("state_name", pg_state_string(p->first)); + f->dump_unsigned("count", p->second); + f->close_section(); } else { if (p != num_pg_by_state.begin()) ss << ", "; diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index 648a8fe2384..d86cbe70c19 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -255,6 +255,12 @@ void PGMonitor::update_from_paxos(bool *need_bootstrap) update_logger(); } +void PGMonitor::on_upgrade() +{ + dout(1) << __func__ << " discarding in-core PGMap" << dendl; + pg_map = PGMap(); +} + void PGMonitor::upgrade_format() { unsigned current = 1; diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h index e8e1b4210aa..44015395e94 100644 --- a/src/mon/PGMonitor.h +++ b/src/mon/PGMonitor.h @@ -60,6 +60,7 @@ private: void create_initial(); void update_from_paxos(bool *need_bootstrap); void upgrade_format(); + void on_upgrade(); void post_paxos_update(); void handle_osd_timeouts(); void create_pending(); // prepare a new pending diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 508669deef5..a543abed7ed 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -210,7 +210,7 @@ void Paxos::handle_collect(MMonPaxos *collect) // do we have an accepted but uncommitted value? // (it'll be at last_committed+1) bufferlist bl; - if (collect->last_committed == last_committed && + if (collect->last_committed <= last_committed && get_store()->exists(get_name(), last_committed+1)) { get_store()->get(get_name(), last_committed+1, bl); assert(bl.length() > 0); diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index d6e67a1c4b4..1b21689863b 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -114,7 +114,13 @@ void PaxosService::refresh(bool *need_bootstrap) // update cached versions cached_first_committed = mon->store->get(get_service_name(), first_committed_name); cached_last_committed = mon->store->get(get_service_name(), last_committed_name); - format_version = get_value("format_version"); + + version_t new_format = get_value("format_version"); + if (new_format != format_version) { + dout(1) << __func__ << " upgraded, format " << format_version << " -> " << new_format << dendl; + on_upgrade(); + } + format_version = new_format; dout(10) << __func__ << dendl; diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index 74d5a90494c..5321bebcace 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -459,6 +459,11 @@ public: virtual void upgrade_format() { } /** + * this is called when we detect the store has just upgraded underneath us + */ + virtual void on_upgrade() {} + + /** * Called when the Paxos system enters a Leader election. * * @remarks It's a courtesy method, in case the class implementing this diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 17105c11d69..108a857ab9f 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -36,7 +36,7 @@ #endif #include "include/compat.h" -#include "include/fiemap.h" +#include "include/linux_fiemap.h" #include "common/xattr.h" #include "chain_xattr.h" @@ -2243,6 +2243,7 @@ int FileStore::_check_global_replay_guard(coll_t cid, if (r < 0) { dout(20) << __func__ << " no xattr" << dendl; assert(!m_filestore_fail_eio || r != -EIO); + TEMP_FAILURE_RETRY(::close(fd)); return 1; // no xattr } bufferlist bl; @@ -4304,9 +4305,6 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid, get_cdir(cid, old_coll, sizeof(old_coll)); get_cdir(ncid, new_coll, sizeof(new_coll)); - _set_global_replay_guard(cid, spos); - _set_replay_guard(cid, spos); - if (_check_replay_guard(cid, spos) < 0) { return 0; } @@ -4315,6 +4313,16 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid, return _collection_remove_recursive(cid, spos); } + if (!collection_exists(cid)) { + if (replaying) { + // already happened + return 0; + } else { + return -ENOENT; + } + } + _set_global_replay_guard(cid, spos); + int ret = 0; if (::rename(old_coll, new_coll)) { if (replaying && !btrfs_stable_commits && diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc index 86a912bbef2..c279bab3a60 100644 --- a/src/os/HashIndex.cc +++ b/src/os/HashIndex.cc @@ -447,18 +447,7 @@ int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) { r = move_objects(path, dst); if (r < 0) return r; - - map<string,hobject_t> objects_dst; - r = list_objects(dst, 0, 0, &objects_dst); - if (r < 0) - return r; - set<string> subdirs; - r = list_subdirs(dst, &subdirs); - if (r < 0) - return r; - dstinfo.objs = objects_dst.size(); - dstinfo.subdirs = subdirs.size() - 1; - r = set_info(dst, dstinfo); + r = reset_attr(dst); if (r < 0) return r; r = remove_path(path); @@ -576,7 +565,7 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) { if (r < 0) return r; info.objs = objects.size(); - r = set_info(path, info); + r = reset_attr(path); if (r < 0) return r; r = fsync_dir(path); diff --git a/src/os/LFNIndex.cc b/src/os/LFNIndex.cc index edf361a44f0..09d0f02267f 100644 --- a/src/os/LFNIndex.cc +++ b/src/os/LFNIndex.cc @@ -75,16 +75,19 @@ int LFNIndex::init() int LFNIndex::created(const hobject_t &hoid, const char *path) { + WRAP_RETRY( vector<string> path_comp; string short_name; - int r; r = decompose_full_path(path, &path_comp, 0, &short_name); if (r < 0) - return r; + goto out; r = lfn_created(path_comp, hoid, short_name); if (r < 0) - return r; - return _created(path_comp, hoid, short_name); + goto out; + r = _created(path_comp, hoid, short_name); + if (r < 0) + goto out; + ); } int LFNIndex::unlink(const hobject_t &hoid) diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc index ae97b6b08d3..9d8b989225b 100644 --- a/src/os/ObjectStore.cc +++ b/src/os/ObjectStore.cc @@ -435,6 +435,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f) f->dump_string("first", first); f->dump_string("last", last); } + break; default: f->dump_string("op_name", "unknown"); diff --git a/src/osd/ClassHandler.cc b/src/osd/ClassHandler.cc index 5af2ac01a0f..a9a920ba078 100644 --- a/src/osd/ClassHandler.cc +++ b/src/osd/ClassHandler.cc @@ -18,6 +18,11 @@ #undef dout_prefix #define dout_prefix *_dout + +#define CLS_PREFIX "libcls_" +#define CLS_SUFFIX ".so" + + int ClassHandler::open_class(const string& cname, ClassData **pcls) { Mutex::Locker lock(mutex); @@ -31,11 +36,43 @@ int ClassHandler::open_class(const string& cname, ClassData **pcls) return 0; } +int ClassHandler::open_all_classes() +{ + dout(10) << __func__ << dendl; + DIR *dir = ::opendir(g_conf->osd_class_dir.c_str()); + if (!dir) + return -errno; + + char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1]; + struct dirent *pde; + int r = 0; + while ((r = ::readdir_r(dir, (dirent *)&buf, &pde)) == 0 && pde) { + if (pde->d_name[0] == '.') + continue; + if (strlen(pde->d_name) > sizeof(CLS_PREFIX) - 1 + sizeof(CLS_SUFFIX) - 1 && + strncmp(pde->d_name, CLS_PREFIX, sizeof(CLS_PREFIX) - 1) == 0 && + strcmp(pde->d_name + strlen(pde->d_name) - (sizeof(CLS_SUFFIX) - 1), CLS_SUFFIX) == 0) { + char cname[PATH_MAX + 1]; + strcpy(cname, pde->d_name + sizeof(CLS_PREFIX) - 1); + cname[strlen(cname) - (sizeof(CLS_SUFFIX) - 1)] = '\0'; + dout(10) << __func__ << " found " << cname << dendl; + ClassData *cls; + r = open_class(cname, &cls); + if (r < 0) + goto out; + } + } + out: + closedir(dir); + return r; +} + void ClassHandler::shutdown() { for (map<string, ClassData>::iterator p = classes.begin(); p != classes.end(); ++p) { dlclose(p->second.handle); } + classes.clear(); } ClassHandler::ClassData *ClassHandler::_get_class(const string& cname) @@ -63,7 +100,7 @@ int ClassHandler::_load_class(ClassData *cls) if (cls->status == ClassData::CLASS_UNKNOWN || cls->status == ClassData::CLASS_MISSING) { char fname[PATH_MAX]; - snprintf(fname, sizeof(fname), "%s/libcls_%s.so", + snprintf(fname, sizeof(fname), "%s/" CLS_PREFIX "%s" CLS_SUFFIX, g_conf->osd_class_dir.c_str(), cls->name.c_str()); dout(10) << "_load_class " << cls->name << " from " << fname << dendl; diff --git a/src/osd/ClassHandler.h b/src/osd/ClassHandler.h index 733ed01a35d..f7c80f9454b 100644 --- a/src/osd/ClassHandler.h +++ b/src/osd/ClassHandler.h @@ -78,6 +78,8 @@ private: public: ClassHandler() : mutex("ClassHandler") {} + int open_all_classes(); + int open_class(const string& cname, ClassData **pcls); ClassData *register_class(const char *cname); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 3f226cec95d..e3a7c227e15 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -291,11 +291,13 @@ void OSDService::init_splits_between(pg_t pgid, // Ok, a split happened, so we need to walk the osdmaps set<pg_t> new_pgs; // pgs to scan on each map new_pgs.insert(pgid); + OSDMapRef curmap(get_map(frommap->get_epoch())); for (epoch_t e = frommap->get_epoch() + 1; e <= tomap->get_epoch(); ++e) { - OSDMapRef curmap(get_map(e-1)); - OSDMapRef nextmap(get_map(e)); + OSDMapRef nextmap(try_get_map(e)); + if (!nextmap) + continue; set<pg_t> even_newer_pgs; // pgs added in this loop for (set<pg_t>::iterator i = new_pgs.begin(); i != new_pgs.end(); ++i) { set<pg_t> split_pgs; @@ -307,7 +309,9 @@ void OSDService::init_splits_between(pg_t pgid, } } new_pgs.insert(even_newer_pgs.begin(), even_newer_pgs.end()); + curmap = nextmap; } + assert(curmap == tomap); // we must have had both frommap and tomap } } @@ -993,44 +997,44 @@ class OSDSocketHook : public AdminSocketHook { OSD *osd; public: OSDSocketHook(OSD *o) : osd(o) {} - bool call(std::string command, std::string args, bufferlist& out) { + bool call(std::string command, std::string args, std::string format, + bufferlist& out) { stringstream ss; - bool r = osd->asok_command(command, args, ss); + bool r = osd->asok_command(command, args, format, ss); out.append(ss); return r; } }; -bool OSD::asok_command(string command, string args, ostream& ss) +bool OSD::asok_command(string command, string args, string format, ostream& ss) { + if (format == "") + format = "json-pretty"; + Formatter *f = new_formatter(format); if (command == "dump_ops_in_flight") { - op_tracker.dump_ops_in_flight(ss); + op_tracker.dump_ops_in_flight(f); } else if (command == "dump_historic_ops") { - op_tracker.dump_historic_ops(ss); + op_tracker.dump_historic_ops(f); } else if (command == "dump_op_pq_state") { - JSONFormatter f(true); - f.open_object_section("pq"); - op_wq.dump(&f); - f.close_section(); - f.flush(ss); + f->open_object_section("pq"); + op_wq.dump(f); + f->close_section(); } else if (command == "dump_blacklist") { list<pair<entity_addr_t,utime_t> > bl; OSDMapRef curmap = service.get_osdmap(); - JSONFormatter f(true); - f.open_array_section("blacklist"); + f->open_array_section("blacklist"); curmap->get_blacklist(&bl); for (list<pair<entity_addr_t,utime_t> >::iterator it = bl.begin(); it != bl.end(); ++it) { - f.open_array_section("entry"); - f.open_object_section("entity_addr_t"); - it->first.dump(&f); - f.close_section(); //entity_addr_t - it->second.localtime(f.dump_stream("expire_time")); - f.close_section(); //entry - } - f.close_section(); //blacklist - f.flush(ss); + f->open_array_section("entry"); + f->open_object_section("entity_addr_t"); + it->first.dump(f); + f->close_section(); //entity_addr_t + it->second.localtime(f->dump_stream("expire_time")); + f->close_section(); //entry + } + f->close_section(); //blacklist } else if (command == "dump_watchers") { list<obj_watch_item_t> watchers; osd_lock.Lock(); @@ -1048,35 +1052,35 @@ bool OSD::asok_command(string command, string args, ostream& ss) } osd_lock.Unlock(); - JSONFormatter f(true); - f.open_array_section("watchers"); + f->open_array_section("watchers"); for (list<obj_watch_item_t>::iterator it = watchers.begin(); it != watchers.end(); ++it) { - f.open_array_section("watch"); + f->open_array_section("watch"); - f.dump_string("namespace", it->obj.nspace); - f.dump_string("object", it->obj.oid.name); + f->dump_string("namespace", it->obj.nspace); + f->dump_string("object", it->obj.oid.name); - f.open_object_section("entity_name"); - it->wi.name.dump(&f); - f.close_section(); //entity_name_t + f->open_object_section("entity_name"); + it->wi.name.dump(f); + f->close_section(); //entity_name_t - f.dump_int("cookie", it->wi.cookie); - f.dump_int("timeout", it->wi.timeout_seconds); + f->dump_int("cookie", it->wi.cookie); + f->dump_int("timeout", it->wi.timeout_seconds); - f.open_object_section("entity_addr_t"); - it->wi.addr.dump(&f); - f.close_section(); //entity_addr_t + f->open_object_section("entity_addr_t"); + it->wi.addr.dump(f); + f->close_section(); //entity_addr_t - f.close_section(); //watch + f->close_section(); //watch } - f.close_section(); //watches - f.flush(ss); + f->close_section(); //watches } else { assert(0 == "broken asok registration"); } + f->flush(ss); + delete f; return true; } @@ -1085,7 +1089,8 @@ class TestOpsSocketHook : public AdminSocketHook { ObjectStore *store; public: TestOpsSocketHook(OSDService *s, ObjectStore *st) : service(s), store(st) {} - bool call(std::string command, std::string args, bufferlist& out) { + bool call(std::string command, std::string args, std::string format, + bufferlist& out) { stringstream ss; test_ops(service, store, command, args, ss); out.append(ss); @@ -1162,6 +1167,12 @@ int OSD::init() class_handler = new ClassHandler(); cls_initialize(class_handler); + if (g_conf->osd_open_classes_on_start) { + int r = class_handler->open_all_classes(); + if (r) + dout(1) << "warning: got an error loading one or more classes: " << cpp_strerror(r) << dendl; + } + // load up "current" osdmap assert_warn(!osdmap); if (osdmap) { @@ -3808,51 +3819,90 @@ void OSD::handle_command(MCommand *m) struct OSDCommand { string cmdstring; string helpstring; + string module; + string perm; + string availability; } osd_commands[] = { -#define COMMAND(parsesig, helptext) \ - {parsesig, helptext}, +#define COMMAND(parsesig, helptext, module, perm, availability) \ + {parsesig, helptext, module, perm, availability}, // yes, these are really pg commands, but there's a limit to how -// much work it's worth. The OSD returns all of them. +// much work it's worth. The OSD returns all of them. Make this +// form (pg <pgid> <cmd>) valid only for the cli. +// Rest uses "tell <pgid> <cmd>" COMMAND("pg " \ "name=pgid,type=CephPgid " \ "name=cmd,type=CephChoices,strings=query", \ - "show details of a specific pg") + "show details of a specific pg", "osd", "r", "cli") COMMAND("pg " \ "name=pgid,type=CephPgid " \ "name=cmd,type=CephChoices,strings=mark_unfound_lost " \ "name=mulcmd,type=CephChoices,strings=revert", \ - "mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available") + "mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available", + "osd", "rw", "cli") COMMAND("pg " \ "name=pgid,type=CephPgid " \ "name=cmd,type=CephChoices,strings=list_missing " \ "name=offset,type=CephString,req=false", - "list missing objects on this pg, perhaps starting at an offset given in JSON") + "list missing objects on this pg, perhaps starting at an offset given in JSON", + "osd", "r", "cli") -COMMAND("version", "report version of OSD") +// new form: tell <pgid> <cmd> for both cli and rest + +COMMAND("query", + "show details of a specific pg", "osd", "r", "cli,rest") +COMMAND("mark_unfound_lost " \ + "name=mulcmd,type=CephChoices,strings=revert", \ + "mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available", + "osd", "rw", "cli,rest") +COMMAND("list_missing " \ + "name=offset,type=CephString,req=false", + "list missing objects on this pg, perhaps starting at an offset given in JSON", + "osd", "r", "cli,rest") + +// tell <osd.n> commands. Validation of osd.n must be special-cased in client + +// tell <osd.n> commands. Validation of osd.n must be special-cased in client +COMMAND("version", "report version of OSD", "osd", "r", "cli,rest") COMMAND("injectargs " \ "name=injected_args,type=CephString,n=N", - "inject configuration arguments into running OSD") + "inject configuration arguments into running OSD", + "osd", "rw", "cli,rest") COMMAND("bench " \ "name=count,type=CephInt,req=false " \ "name=size,type=CephInt,req=false ", \ "OSD benchmark: write <count> <size>-byte objects, " \ - "(default 1G size 4MB). Results in log.") -COMMAND("flush_pg_stats", "flush pg stats") -COMMAND("debug dump_missing " \ + "(default 1G size 4MB). Results in log.", + "osd", "rw", "cli,rest") +COMMAND("flush_pg_stats", "flush pg stats", "osd", "rw", "cli,rest") +COMMAND("debug_dump_missing " \ "name=filename,type=CephFilepath", - "dump missing objects to a named file") + "dump missing objects to a named file", "osd", "r", "cli,rest") COMMAND("debug kick_recovery_wq " \ "name=delay,type=CephInt,range=0", - "set osd_recovery_delay_start to <val>") + "set osd_recovery_delay_start to <val>", "osd", "rw", "cli,rest") COMMAND("cpu_profiler " \ "name=arg,type=CephChoices,strings=status|flush", - "run cpu profiling on daemon") -COMMAND("dump_pg_recovery_stats", "dump pg recovery statistics") -COMMAND("reset_pg_recovery_stats", "reset pg recovery statistics") - + "run cpu profiling on daemon", "osd", "rw", "cli,rest") +COMMAND("dump_pg_recovery_stats", "dump pg recovery statistics", + "osd", "r", "cli,rest") +COMMAND("reset_pg_recovery_stats", "reset pg recovery statistics", + "osd", "rw", "cli,rest") + +// experiment: restate pg commands as "tell <pgid>". Validation of +// pgid must be special-cased in client. +COMMAND("query", + "show details of a specific pg", "osd", "r", "cli,rest") +COMMAND("mark_unfound_lost revert " \ + "name=mulcmd,type=CephChoices,strings=revert", \ + "mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available", + "osd", "rw", "cli,rest") +COMMAND("list_missing " \ + "name=offset,type=CephString,req=false", + "list missing objects on this pg, perhaps starting at an offset given in JSON", + "osd", "rw", "cli,rest") }; void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist& data) @@ -3866,6 +3916,9 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist map<string, cmd_vartype> cmdmap; string prefix; + string format; + string pgidstr; + boost::scoped_ptr<Formatter> f; if (cmd.empty()) { ss << "no command given"; @@ -3888,8 +3941,8 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist ostringstream secname; secname << "cmd" << setfill('0') << std::setw(3) << cmdnum; - dump_cmd_and_help_to_json(f, secname.str(), - cp->cmdstring, cp->helpstring); + dump_cmddesc_to_json(f, secname.str(), cp->cmdstring, cp->helpstring, + cp->module, cp->perm, cp->availability); cmdnum++; } f->close_section(); // command_descriptions @@ -3900,8 +3953,18 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist goto out; } + cmd_getval(g_ceph_context, cmdmap, "format", format); + f.reset(new_formatter(format)); + if (prefix == "version") { - ds << pretty_version_to_str(); + if (f) { + f->open_object_section("version"); + f->dump_string("version", pretty_version_to_str()); + f->close_section(); + f->flush(ds); + } else { + ds << pretty_version_to_str(); + } goto out; } else if (prefix == "injectargs") { @@ -3921,9 +3984,16 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist osd_lock.Lock(); } - else if (prefix == "pg") { + // either 'pg <pgid> <command>' or + // 'tell <pgid>' (which comes in without any of that prefix)? + + else if (prefix == "pg" || + (cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr) && + (prefix == "query" || + prefix == "mark_unfound_lost" || + prefix == "list_missing") + )) { pg_t pgid; - string pgidstr; if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) { ss << "no pgid specified"; @@ -3932,14 +4002,15 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist ss << "couldn't parse pgid '" << pgidstr << "'"; r = -EINVAL; } else { - vector<string> args; - cmd_getval(g_ceph_context, cmdmap, "args", args); PG *pg = _lookup_lock_pg(pgid); if (!pg) { ss << "i don't have pgid " << pgid; r = -ENOENT; } else { - r = pg->do_command(cmd, ss, data, odata); + // simulate pg <pgid> cmd= for pg->do-command + if (prefix != "pg") + cmd_putval(g_ceph_context, cmdmap, "cmd", prefix); + r = pg->do_command(cmdmap, ss, data, odata); pg->unlock(); } } @@ -3978,9 +4049,18 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist store->queue_transaction(NULL, cleanupt); uint64_t rate = (double)count / (end - start); - ss << "bench: wrote " << prettybyte_t(count) - << " in blocks of " << prettybyte_t(bsize) << " in " - << (end-start) << " sec at " << prettybyte_t(rate) << "/sec"; + if (f) { + f->open_object_section("osd_bench_results"); + f->dump_int("bytes_written", count); + f->dump_int("blocksize", bsize); + f->dump_float("bytes_per_sec", rate); + f->close_section(); + f->flush(ss); + } else { + ss << "bench: wrote " << prettybyte_t(count) + << " in blocks of " << prettybyte_t(bsize) << " in " + << (end-start) << " sec at " << prettybyte_t(rate) << "/sec"; + } } else if (prefix == "flush_pg_stats") { @@ -4077,8 +4157,13 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist else if (prefix == "dump_pg_recovery_stats") { stringstream s; - pg_recovery_stats.dump(s); - ds << "dump pg recovery stats: " << s.str(); + if (f) { + pg_recovery_stats.dump_formatted(f.get()); + f->flush(ds); + } else { + pg_recovery_stats.dump(s); + ds << "dump pg recovery stats: " << s.str(); + } } else if (prefix == "reset_pg_recovery_stats") { @@ -5177,7 +5262,9 @@ void OSD::advance_pg( for (; next_epoch <= osd_epoch; ++next_epoch) { - OSDMapRef nextmap = get_map(next_epoch); + OSDMapRef nextmap = service.try_get_map(next_epoch); + if (!nextmap) + continue; vector<int> newup, newacting; nextmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting); @@ -5511,7 +5598,7 @@ OSDMapRef OSDService::_add_map(OSDMap *o) return l; } -OSDMapRef OSDService::get_map(epoch_t epoch) +OSDMapRef OSDService::try_get_map(epoch_t epoch) { Mutex::Locker l(map_cache_lock); OSDMapRef retval = map_cache.lookup(epoch); @@ -5524,7 +5611,10 @@ OSDMapRef OSDService::get_map(epoch_t epoch) if (epoch > 0) { dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl; bufferlist bl; - assert(_get_map_bl(epoch, bl)); + if (!_get_map_bl(epoch, bl)) { + delete map; + return OSDMapRef(); + } map->decode(bl); } else { dout(20) << "get_map " << epoch << " - return initial " << map << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 04ad4dcd7d7..5bcff7442d7 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -441,7 +441,12 @@ public: SimpleLRU<epoch_t, bufferlist> map_bl_cache; SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache; - OSDMapRef get_map(epoch_t e); + OSDMapRef try_get_map(epoch_t e); + OSDMapRef get_map(epoch_t e) { + OSDMapRef ret(try_get_map(e)); + assert(ret); + return ret; + } OSDMapRef add_map(OSDMap *o) { Mutex::Locker l(map_cache_lock); return _add_map(o); @@ -617,7 +622,7 @@ protected: // asok friend class OSDSocketHook; class OSDSocketHook *asok_hook; - bool asok_command(string command, string args, ostream& ss); + bool asok_command(string command, string args, string format, ostream& ss); public: ClassHandler *class_handler; diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index 3b8a8714d92..a6cdc9ecffb 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -76,31 +76,27 @@ void OpHistory::dump_ops(utime_t now, Formatter *f) f->close_section(); } -void OpTracker::dump_historic_ops(ostream &ss) +void OpTracker::dump_historic_ops(Formatter *f) { - JSONFormatter jf(true); Mutex::Locker locker(ops_in_flight_lock); utime_t now = ceph_clock_now(g_ceph_context); - history.dump_ops(now, &jf); - jf.flush(ss); + history.dump_ops(now, f); } -void OpTracker::dump_ops_in_flight(ostream &ss) +void OpTracker::dump_ops_in_flight(Formatter *f) { - JSONFormatter jf(true); Mutex::Locker locker(ops_in_flight_lock); - jf.open_object_section("ops_in_flight"); // overall dump - jf.dump_int("num_ops", ops_in_flight.size()); - jf.open_array_section("ops"); // list of OpRequests + f->open_object_section("ops_in_flight"); // overall dump + f->dump_int("num_ops", ops_in_flight.size()); + f->open_array_section("ops"); // list of OpRequests utime_t now = ceph_clock_now(g_ceph_context); for (xlist<OpRequest*>::iterator p = ops_in_flight.begin(); !p.end(); ++p) { - jf.open_object_section("op"); - (*p)->dump(now, &jf); - jf.close_section(); // this OpRequest + f->open_object_section("op"); + (*p)->dump(now, f); + f->close_section(); // this OpRequest } - jf.close_section(); // list of OpRequests - jf.close_section(); // overall dump - jf.flush(ss); + f->close_section(); // list of OpRequests + f->close_section(); // overall dump } void OpTracker::register_inflight_op(xlist<OpRequest*>::item *i) diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 47b050b8538..a2014472432 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -59,8 +59,8 @@ class OpTracker { public: OpTracker() : seq(0), ops_in_flight_lock("OpTracker mutex") {} - void dump_ops_in_flight(std::ostream& ss); - void dump_historic_ops(std::ostream& ss); + void dump_ops_in_flight(Formatter *f); + void dump_historic_ops(Formatter *f); void register_inflight_op(xlist<OpRequest*>::item *i); void unregister_inflight_op(OpRequest *i); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7373357db11..9f957b8e054 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -5032,7 +5032,6 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap, vector<int>& newup, vector<int>& newacting, RecoveryCtx *rctx) { - assert(osdmap->get_epoch() == (lastmap->get_epoch() + 1)); assert(lastmap->get_epoch() == osdmap_ref->get_epoch()); assert(lastmap == osdmap_ref); dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 819c9c62f62..10e9a2544a9 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -43,8 +43,10 @@ #include "msg/Messenger.h" #include "messages/MOSDRepScrub.h" #include "messages/MOSDPGLog.h" +#include "common/cmdparse.h" #include "common/tracked_int_ptr.hpp" #include "common/WorkQueue.h" +#include "include/str_list.h" #include <list> #include <memory> @@ -108,10 +110,36 @@ struct PGRecoveryStats { << i.total_time << "\t" << i.min_time << "\t" << i.max_time << "\t" << p->first << "\n"; - } } + void dump_formatted(Formatter *f) { + Mutex::Locker l(lock); + f->open_array_section("pg_recovery_stats"); + for (map<const char *,per_state_info>::iterator p = info.begin(); + p != info.end(); ++p) { + per_state_info& i = p->second; + f->open_object_section("recovery_state"); + f->dump_int("enter", i.enter); + f->dump_int("exit", i.exit); + f->dump_int("events", i.events); + f->dump_stream("event_time") << i.event_time; + f->dump_stream("total_time") << i.total_time; + f->dump_stream("min_time") << i.min_time; + f->dump_stream("max_time") << i.max_time; + vector<string> states; + get_str_vec(p->first, "/", states); + f->open_array_section("nested_states"); + for (vector<string>::iterator st = states.begin(); + st != states.end(); ++st) { + f->dump_string("state", *st); + } + f->close_section(); + f->close_section(); + } + f->close_section(); + } + void log_enter(const char *s) { Mutex::Locker l(lock); info[s].enter++; @@ -1786,7 +1814,7 @@ public: virtual void do_push_reply(OpRequestRef op) = 0; virtual void snap_trimmer() = 0; - virtual int do_command(vector<string>& cmd, ostream& ss, + virtual int do_command(cmdmap_t cmdmap, ostream& ss, bufferlist& idata, bufferlist& odata) = 0; virtual bool same_for_read_since(epoch_t e) = 0; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 298d38d6ace..658ea7cb746 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -268,57 +268,43 @@ int ReplicatedPG::get_pgls_filter(bufferlist::iterator& iter, PGLSFilter **pfilt // ========================================================== -int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss, +int ReplicatedPG::do_command(cmdmap_t cmdmap, ostream& ss, bufferlist& idata, bufferlist& odata) { const pg_missing_t &missing = pg_log.get_missing(); - map<string, cmd_vartype> cmdmap; string prefix; + string format; - if (cmd.empty()) { - ss << "no command given"; - return -EINVAL; - } - - stringstream ss2; - if (!cmdmap_from_json(cmd, &cmdmap, ss2)) { - ss << ss2.str(); - return -EINVAL; - } - - cmd_getval(g_ceph_context, cmdmap, "prefix", prefix); - if (prefix != "pg") { - ss << "ReplicatedPG::do_command: not pg command"; - return -EINVAL; - } + cmd_getval(g_ceph_context, cmdmap, "format", format); + boost::scoped_ptr<Formatter> f(new_formatter(format)); + // demand that we have a formatter + if (!f) + f.reset(new_formatter("json")); string command; cmd_getval(g_ceph_context, cmdmap, "cmd", command); if (command == "query") { - JSONFormatter jsf(true); - jsf.open_object_section("pg"); - jsf.dump_string("state", pg_state_string(get_state())); - jsf.dump_unsigned("epoch", get_osdmap()->get_epoch()); - jsf.open_array_section("up"); + f->open_object_section("pg"); + f->dump_string("state", pg_state_string(get_state())); + f->dump_unsigned("epoch", get_osdmap()->get_epoch()); + f->open_array_section("up"); for (vector<int>::iterator p = up.begin(); p != up.end(); ++p) - jsf.dump_unsigned("osd", *p); - jsf.close_section(); - jsf.open_array_section("acting"); + f->dump_unsigned("osd", *p); + f->close_section(); + f->open_array_section("acting"); for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p) - jsf.dump_unsigned("osd", *p); - jsf.close_section(); - jsf.open_object_section("info"); - info.dump(&jsf); - jsf.close_section(); - - jsf.open_array_section("recovery_state"); - handle_query_state(&jsf); - jsf.close_section(); - - jsf.close_section(); - stringstream dss; - jsf.flush(dss); - odata.append(dss); + f->dump_unsigned("osd", *p); + f->close_section(); + f->open_object_section("info"); + info.dump(f.get()); + f->close_section(); + + f->open_array_section("recovery_state"); + handle_query_state(f.get()); + f->close_section(); + + f->close_section(); + f->flush(odata); return 0; } else if (command == "mark_unfound_lost") { @@ -352,7 +338,6 @@ int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss, return 0; } else if (command == "list_missing") { - JSONFormatter jf(true); hobject_t offset; string offset_json; if (cmd_getval(g_ceph_context, cmdmap, "offset", offset_json)) { @@ -366,50 +351,48 @@ int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss, return -EINVAL; } } - jf.open_object_section("missing"); + f->open_object_section("missing"); { - jf.open_object_section("offset"); - offset.dump(&jf); - jf.close_section(); + f->open_object_section("offset"); + offset.dump(f.get()); + f->close_section(); } - jf.dump_int("num_missing", missing.num_missing()); - jf.dump_int("num_unfound", get_num_unfound()); + f->dump_int("num_missing", missing.num_missing()); + f->dump_int("num_unfound", get_num_unfound()); map<hobject_t,pg_missing_t::item>::const_iterator p = missing.missing.upper_bound(offset); { - jf.open_array_section("objects"); + f->open_array_section("objects"); int32_t num = 0; bufferlist bl; while (p != missing.missing.end() && num < g_conf->osd_command_max_records) { - jf.open_object_section("object"); + f->open_object_section("object"); { - jf.open_object_section("oid"); - p->first.dump(&jf); - jf.close_section(); + f->open_object_section("oid"); + p->first.dump(f.get()); + f->close_section(); } - p->second.dump(&jf); // have, need keys + p->second.dump(f.get()); // have, need keys { - jf.open_array_section("locations"); + f->open_array_section("locations"); map<hobject_t,set<int> >::iterator q = missing_loc.find(p->first); if (q != missing_loc.end()) for (set<int>::iterator r = q->second.begin(); r != q->second.end(); ++r) - jf.dump_int("osd", *r); - jf.close_section(); + f->dump_int("osd", *r); + f->close_section(); } - jf.close_section(); + f->close_section(); ++p; num++; } - jf.close_section(); + f->close_section(); } - jf.dump_int("more", p != missing.missing.end()); - jf.close_section(); - stringstream jss; - jf.flush(jss); - odata.append(jss); + f->dump_int("more", p != missing.missing.end()); + f->close_section(); + f->flush(odata); return 0; }; - ss << "unknown command " << cmd; + ss << "unknown pg command " << prefix; return -EINVAL; } diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 9dafe23faa1..7b70b4381ea 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -17,6 +17,7 @@ #include <boost/optional.hpp> #include "include/assert.h" +#include "common/cmdparse.h" #include "PG.h" #include "OSD.h" @@ -930,7 +931,8 @@ public: const hobject_t& ioid); ~ReplicatedPG() {} - int do_command(vector<string>& cmd, ostream& ss, bufferlist& idata, bufferlist& odata); + int do_command(cmdmap_t cmdmap, ostream& ss, bufferlist& idata, + bufferlist& odata); void do_op(OpRequestRef op); bool pg_op_must_wait(MOSDOp *op); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 3a6db4d8315..ca3dcc192b0 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1141,6 +1141,13 @@ struct pg_history_t { epoch_t last_epoch_clean; // lower bound on last epoch the PG was completely clean. epoch_t last_epoch_split; // as parent + /** + * In the event of a map discontinuity, same_*_since may reflect the first + * map the osd has seen in the new map sequence rather than the actual start + * of the interval. This is ok since a discontinuity at epoch e means there + * must have been a clean interval between e and now and that we cannot be + * in the active set during the interval containing e. + */ epoch_t same_up_since; // same acting set since epoch_t same_interval_since; // same acting AND up set since epoch_t same_primary_since; // same primary at least back through this epoch. diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 9933f853f8f..aefe74088e9 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2166,154 +2166,154 @@ void Objecter::dump_active() } } -void Objecter::dump_requests(Formatter& fmt) const +void Objecter::dump_requests(Formatter *fmt) const { assert(client_lock.is_locked()); - fmt.open_object_section("requests"); + fmt->open_object_section("requests"); dump_ops(fmt); dump_linger_ops(fmt); dump_pool_ops(fmt); dump_pool_stat_ops(fmt); dump_statfs_ops(fmt); dump_command_ops(fmt); - fmt.close_section(); // requests object + fmt->close_section(); // requests object } -void Objecter::dump_ops(Formatter& fmt) const +void Objecter::dump_ops(Formatter *fmt) const { - fmt.open_array_section("ops"); + fmt->open_array_section("ops"); for (map<tid_t,Op*>::const_iterator p = ops.begin(); p != ops.end(); ++p) { Op *op = p->second; - fmt.open_object_section("op"); - fmt.dump_unsigned("tid", op->tid); - fmt.dump_stream("pg") << op->pgid; - fmt.dump_int("osd", op->session ? op->session->osd : -1); - fmt.dump_stream("last_sent") << op->stamp; - fmt.dump_int("attempts", op->attempts); - fmt.dump_stream("object_id") << op->oid; - fmt.dump_stream("object_locator") << op->oloc; - fmt.dump_stream("snapid") << op->snapid; - fmt.dump_stream("snap_context") << op->snapc; - fmt.dump_stream("mtime") << op->mtime; - - fmt.open_array_section("osd_ops"); + fmt->open_object_section("op"); + fmt->dump_unsigned("tid", op->tid); + fmt->dump_stream("pg") << op->pgid; + fmt->dump_int("osd", op->session ? op->session->osd : -1); + fmt->dump_stream("last_sent") << op->stamp; + fmt->dump_int("attempts", op->attempts); + fmt->dump_stream("object_id") << op->oid; + fmt->dump_stream("object_locator") << op->oloc; + fmt->dump_stream("snapid") << op->snapid; + fmt->dump_stream("snap_context") << op->snapc; + fmt->dump_stream("mtime") << op->mtime; + + fmt->open_array_section("osd_ops"); for (vector<OSDOp>::const_iterator it = op->ops.begin(); it != op->ops.end(); ++it) { - fmt.dump_stream("osd_op") << *it; + fmt->dump_stream("osd_op") << *it; } - fmt.close_section(); // osd_ops array + fmt->close_section(); // osd_ops array - fmt.close_section(); // op object + fmt->close_section(); // op object } - fmt.close_section(); // ops array + fmt->close_section(); // ops array } -void Objecter::dump_linger_ops(Formatter& fmt) const +void Objecter::dump_linger_ops(Formatter *fmt) const { - fmt.open_array_section("linger_ops"); + fmt->open_array_section("linger_ops"); for (map<uint64_t, LingerOp*>::const_iterator p = linger_ops.begin(); p != linger_ops.end(); ++p) { LingerOp *op = p->second; - fmt.open_object_section("linger_op"); - fmt.dump_unsigned("linger_id", op->linger_id); - fmt.dump_stream("pg") << op->pgid; - fmt.dump_int("osd", op->session ? op->session->osd : -1); - fmt.dump_stream("object_id") << op->oid; - fmt.dump_stream("object_locator") << op->oloc; - fmt.dump_stream("snapid") << op->snap; - fmt.dump_stream("registering") << op->snap; - fmt.dump_stream("registered") << op->snap; - fmt.close_section(); // linger_op object + fmt->open_object_section("linger_op"); + fmt->dump_unsigned("linger_id", op->linger_id); + fmt->dump_stream("pg") << op->pgid; + fmt->dump_int("osd", op->session ? op->session->osd : -1); + fmt->dump_stream("object_id") << op->oid; + fmt->dump_stream("object_locator") << op->oloc; + fmt->dump_stream("snapid") << op->snap; + fmt->dump_stream("registering") << op->snap; + fmt->dump_stream("registered") << op->snap; + fmt->close_section(); // linger_op object } - fmt.close_section(); // linger_ops array + fmt->close_section(); // linger_ops array } -void Objecter::dump_command_ops(Formatter& fmt) const +void Objecter::dump_command_ops(Formatter *fmt) const { - fmt.open_array_section("command_ops"); + fmt->open_array_section("command_ops"); for (map<uint64_t, CommandOp*>::const_iterator p = command_ops.begin(); p != command_ops.end(); ++p) { CommandOp *op = p->second; - fmt.open_object_section("command_op"); - fmt.dump_unsigned("command_id", op->tid); - fmt.dump_int("osd", op->session ? op->session->osd : -1); - fmt.open_array_section("command"); + fmt->open_object_section("command_op"); + fmt->dump_unsigned("command_id", op->tid); + fmt->dump_int("osd", op->session ? op->session->osd : -1); + fmt->open_array_section("command"); for (vector<string>::const_iterator q = op->cmd.begin(); q != op->cmd.end(); ++q) - fmt.dump_string("word", *q); - fmt.close_section(); + fmt->dump_string("word", *q); + fmt->close_section(); if (op->target_osd >= 0) - fmt.dump_int("target_osd", op->target_osd); + fmt->dump_int("target_osd", op->target_osd); else - fmt.dump_stream("target_pg") << op->target_pg; - fmt.close_section(); // command_op object + fmt->dump_stream("target_pg") << op->target_pg; + fmt->close_section(); // command_op object } - fmt.close_section(); // command_ops array + fmt->close_section(); // command_ops array } -void Objecter::dump_pool_ops(Formatter& fmt) const +void Objecter::dump_pool_ops(Formatter *fmt) const { - fmt.open_array_section("pool_ops"); + fmt->open_array_section("pool_ops"); for (map<tid_t, PoolOp*>::const_iterator p = pool_ops.begin(); p != pool_ops.end(); ++p) { PoolOp *op = p->second; - fmt.open_object_section("pool_op"); - fmt.dump_unsigned("tid", op->tid); - fmt.dump_int("pool", op->pool); - fmt.dump_string("name", op->name); - fmt.dump_int("operation_type", op->pool_op); - fmt.dump_unsigned("auid", op->auid); - fmt.dump_unsigned("crush_rule", op->crush_rule); - fmt.dump_stream("snapid") << op->snapid; - fmt.dump_stream("last_sent") << op->last_submit; - fmt.close_section(); // pool_op object + fmt->open_object_section("pool_op"); + fmt->dump_unsigned("tid", op->tid); + fmt->dump_int("pool", op->pool); + fmt->dump_string("name", op->name); + fmt->dump_int("operation_type", op->pool_op); + fmt->dump_unsigned("auid", op->auid); + fmt->dump_unsigned("crush_rule", op->crush_rule); + fmt->dump_stream("snapid") << op->snapid; + fmt->dump_stream("last_sent") << op->last_submit; + fmt->close_section(); // pool_op object } - fmt.close_section(); // pool_ops array + fmt->close_section(); // pool_ops array } -void Objecter::dump_pool_stat_ops(Formatter& fmt) const +void Objecter::dump_pool_stat_ops(Formatter *fmt) const { - fmt.open_array_section("pool_stat_ops"); + fmt->open_array_section("pool_stat_ops"); for (map<tid_t, PoolStatOp*>::const_iterator p = poolstat_ops.begin(); p != poolstat_ops.end(); ++p) { PoolStatOp *op = p->second; - fmt.open_object_section("pool_stat_op"); - fmt.dump_unsigned("tid", op->tid); - fmt.dump_stream("last_sent") << op->last_submit; + fmt->open_object_section("pool_stat_op"); + fmt->dump_unsigned("tid", op->tid); + fmt->dump_stream("last_sent") << op->last_submit; - fmt.open_array_section("pools"); + fmt->open_array_section("pools"); for (list<string>::const_iterator it = op->pools.begin(); it != op->pools.end(); ++it) { - fmt.dump_string("pool", *it); + fmt->dump_string("pool", *it); } - fmt.close_section(); // pool_op object + fmt->close_section(); // pool_op object - fmt.close_section(); // pool_stat_op object + fmt->close_section(); // pool_stat_op object } - fmt.close_section(); // pool_stat_ops array + fmt->close_section(); // pool_stat_ops array } -void Objecter::dump_statfs_ops(Formatter& fmt) const +void Objecter::dump_statfs_ops(Formatter *fmt) const { - fmt.open_array_section("statfs_ops"); + fmt->open_array_section("statfs_ops"); for (map<tid_t, StatfsOp*>::const_iterator p = statfs_ops.begin(); p != statfs_ops.end(); ++p) { StatfsOp *op = p->second; - fmt.open_object_section("statfs_op"); - fmt.dump_unsigned("tid", op->tid); - fmt.dump_stream("last_sent") << op->last_submit; - fmt.close_section(); // pool_stat_op object + fmt->open_object_section("statfs_op"); + fmt->dump_unsigned("tid", op->tid); + fmt->dump_stream("last_sent") << op->last_submit; + fmt->close_section(); // pool_stat_op object } - fmt.close_section(); // pool_stat_ops array + fmt->close_section(); // pool_stat_ops array } Objecter::RequestStateHook::RequestStateHook(Objecter *objecter) : @@ -2321,14 +2321,16 @@ Objecter::RequestStateHook::RequestStateHook(Objecter *objecter) : { } -bool Objecter::RequestStateHook::call(std::string command, std::string args, bufferlist& out) +bool Objecter::RequestStateHook::call(std::string command, std::string args, + std::string format, bufferlist& out) { stringstream ss; - JSONFormatter formatter(true); + Formatter *f = new_formatter(format); m_objecter->client_lock.Lock(); - m_objecter->dump_requests(formatter); + m_objecter->dump_requests(f); m_objecter->client_lock.Unlock(); - formatter.flush(ss); + f->flush(ss); + delete f; out.append(ss); return true; } diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index b593bef69d9..da5487f3a84 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -726,7 +726,8 @@ class Objecter { Objecter *m_objecter; public: RequestStateHook(Objecter *objecter); - bool call(std::string command, std::string args, bufferlist& out); + bool call(std::string command, std::string args, std::string format, + bufferlist& out); }; RequestStateHook *m_request_state_hook; @@ -1233,13 +1234,13 @@ private: * Output in-flight requests */ void dump_active(); - void dump_requests(Formatter& fmt) const; - void dump_ops(Formatter& fmt) const; - void dump_linger_ops(Formatter& fmt) const; - void dump_command_ops(Formatter& fmt) const; - void dump_pool_ops(Formatter& fmt) const; - void dump_pool_stat_ops(Formatter& fmt) const; - void dump_statfs_ops(Formatter& fmt) const; + void dump_requests(Formatter *fmt) const; + void dump_ops(Formatter *fmt) const; + void dump_linger_ops(Formatter *fmt) const; + void dump_command_ops(Formatter *fmt) const; + void dump_pool_ops(Formatter *fmt) const; + void dump_pool_stat_ops(Formatter *fmt) const; + void dump_statfs_ops(Formatter *fmt) const; int get_client_incarnation() const { return client_inc; } void set_client_incarnation(int inc) { client_inc = inc; } diff --git a/src/push_to_kclient.pl b/src/push_to_kclient.pl deleted file mode 100755 index f76a2bb91f4..00000000000 --- a/src/push_to_kclient.pl +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/perl - -use strict; - -my $usage = "./push_to_client.pl <path_to_kernel_git_tree>\n"; - -my $kernel = shift @ARGV || die $usage; -die $usage unless -d $kernel; -die $usage unless -e "$kernel/fs/ceph/README"; - -die "not in a git tree" unless `cd $kernel && git rev-parse HEAD`; - -my $dir = '.'; -until (-d "$dir/.git") { - $dir .= "/.."; -} - -print "pushing changed shared files from $dir to $kernel...\n"; -my @files = split(/\n/, `cat $kernel/fs/ceph/README`); -for (@files) { - next if /^#/; - my ($orig, $new) = split(/\s+/, $_); - #print "$dir/$orig -> $new\n"; - system "cp -v $dir/$orig $kernel/$new"; -} - -print "pulling changed shared files from $kernel to $dir...\n"; -system "cp -v $kernel/fs/ceph/ioctl.h $dir/src/client/ioctl.h"; -system "cp -v $kernel/fs/btrfs/ioctl.h $dir/src/os/btrfs_ioctl.h"; - -print "done.\n"; - diff --git a/src/pybind/ceph_argparse.py b/src/pybind/ceph_argparse.py index 73d1115f645..9a9db6758a5 100644 --- a/src/pybind/ceph_argparse.py +++ b/src/pybind/ceph_argparse.py @@ -263,6 +263,8 @@ class CephIPAddr(CephArgtype): if p is not None and long(p) > 65535: raise ArgumentValid("{0} not a valid port number".format(p)) self.val = s + self.addr = a + self.port = p def __str__(self): return '<IPaddr[:port]>' @@ -274,6 +276,7 @@ class CephEntityAddr(CephIPAddr): def valid(self, s, partial=False): ip, nonce = s.split('/') super(self.__class__, self).valid(ip) + self.nonce = nonce self.val = s def __str__(self): @@ -824,11 +827,10 @@ def validate(args, signature, partial=False): raise ArgumentError("unused arguments: " + str(myargs)) return d -def validate_command(parsed_args, sigdict, args, verbose=False): +def validate_command(sigdict, args, verbose=False): """ turn args into a valid dictionary ready to be sent off as JSON, validated against sigdict. - parsed_args is the namespace back from argparse """ found = [] valid_dict = {} @@ -886,11 +888,51 @@ def validate_command(parsed_args, sigdict, args, verbose=False): print >> sys.stderr, concise_sig(cmd['sig']) return None - if parsed_args.output_format: - valid_dict['format'] = parsed_args.output_format - return valid_dict +def find_cmd_target(childargs): + """ + Using a minimal validation, figure out whether the command + should be sent to a monitor or an osd. We do this before even + asking for the 'real' set of command signatures, so we can ask the + right daemon. + Returns ('osd', osdid), ('pg', pgid), or ('mon', '') + """ + sig = parse_funcsig(['tell', {'name':'target', 'type':'CephName'}]) + try: + valid_dict = validate(childargs, sig, partial=True) + except ArgumentError: + pass + else: + if len(valid_dict) == 2: + # revalidate to isolate type and id + name = CephName() + # if this fails, something is horribly wrong, as it just + # validated successfully above + name.valid(valid_dict['target']) + return name.nametype, name.nameid + + sig = parse_funcsig(['tell', {'name':'pgid', 'type':'CephPgid'}]) + try: + valid_dict = validate(childargs, sig, partial=True) + except ArgumentError: + pass + else: + if len(valid_dict) == 2: + # pg doesn't need revalidation; the string is fine + return 'pg', valid_dict['pgid'] + + sig = parse_funcsig(['pg', {'name':'pgid', 'type':'CephPgid'}]) + try: + valid_dict = validate(childargs, sig, partial=True) + except ArgumentError: + pass + else: + if len(valid_dict) == 2: + return 'pg', valid_dict['pgid'] + + return 'mon', '' + def send_command(cluster, target=('mon', ''), cmd=None, inbuf='', timeout=0, verbose=False): """ @@ -916,8 +958,15 @@ def send_command(cluster, target=('mon', ''), cmd=None, inbuf='', timeout=0, cluster.osd_command(osdid, cmd, inbuf, timeout) elif target[0] == 'pg': - # leave it in cmddict for the OSD to use too pgid = target[1] + # pgid will already be in the command for the pg <pgid> + # form, but for tell <pgid>, we need to put it in + if cmd: + cmddict = json.loads(cmd[0]) + cmddict['pgid'] = pgid + else: + cmddict = dict(pgid=pgid) + cmd = [json.dumps(cmddict)] if verbose: print >> sys.stderr, 'submit {0} for pgid {1}'.\ format(cmd, pgid) diff --git a/src/pybind/ceph_rest_api.py b/src/pybind/ceph_rest_api.py index fdfe84ee3cb..59e3f60a3a7 100755 --- a/src/pybind/ceph_rest_api.py +++ b/src/pybind/ceph_rest_api.py @@ -1,12 +1,13 @@ #!/usr/bin/python # vim: ts=4 sw=4 smarttab expandtab -import os import collections -import ConfigParser +import contextlib +import errno import json import logging import logging.handlers +import os import rados import textwrap import xml.etree.ElementTree @@ -16,16 +17,19 @@ import flask from ceph_argparse import * # -# Globals +# Globals and defaults # -APPNAME = '__main__' +DEFAULT_ADDR = '0.0.0.0' +DEFAULT_PORT = '5000' +DEFAULT_ID = 'restapi' + DEFAULT_BASEURL = '/api/v0.1' -DEFAULT_ADDR = '0.0.0.0:5000' DEFAULT_LOG_LEVEL = 'warning' -DEFAULT_CLIENTNAME = 'client.restapi' -DEFAULT_LOG_FILE = '/var/log/ceph/' + DEFAULT_CLIENTNAME + '.log' +# default client name will be 'client.<DEFAULT_ID>' +# 'app' must be global for decorators, etc. +APPNAME = '__main__' app = flask.Flask(APPNAME) LOGLEVELS = { @@ -36,130 +40,109 @@ LOGLEVELS = { 'debug':logging.DEBUG, } -# my globals, in a named tuple for usage clarity - -glob = collections.namedtuple('gvars', 'cluster urls sigdict baseurl') -glob.cluster = None -glob.urls = {} -glob.sigdict = {} -glob.baseurl = '' - -def load_conf(clustername='ceph', conffile=None): - import contextlib - - - class _TrimIndentFile(object): - def __init__(self, fp): - self.fp = fp - - def readline(self): - line = self.fp.readline() - return line.lstrip(' \t') - - - def _optionxform(s): - s = s.replace('_', ' ') - s = '_'.join(s.split()) - return s - - - def parse(fp): - cfg = ConfigParser.RawConfigParser() - cfg.optionxform = _optionxform - ifp = _TrimIndentFile(fp) - cfg.readfp(ifp) - return cfg - - - def load(path): - f = file(path) - with contextlib.closing(f): - return parse(f) - - if conffile: - # from CEPH_CONF - return load(conffile) - else: - for path in [ - '/etc/ceph/{0}.conf'.format(clustername), - os.path.expanduser('~/.ceph/{0}.conf'.format(clustername)), - '{0}.conf'.format(clustername), - ]: - if os.path.exists(path): - return load(path) +def find_up_osd(app): + ''' + Find an up OSD. Return the last one that's up. + Returns id as an int. + ''' + ret, outbuf, outs = json_command(app.ceph_cluster, prefix="osd dump", + argdict=dict(format='json')) + if ret: + raise EnvironmentError(ret, 'Can\'t get osd dump output') + try: + osddump = json.loads(outbuf) + except: + raise EnvironmentError(errno.EINVAL, 'Invalid JSON back from osd dump') + osds = [osd['osd'] for osd in osddump['osds'] if osd['up']] + if not osds: + raise EnvironmentError(errno.ENOENT, 'No up OSDs found') + return int(osds[-1]) - raise EnvironmentError('No conf file found for "{0}"'.format(clustername)) -def get_conf(cfg, clientname, key): - try: - return cfg.get(clientname, 'restapi_' + key) - except ConfigParser.NoOptionError: - return None +METHOD_DICT = {'r':['GET'], 'w':['PUT', 'DELETE']} -# XXX this is done globally, and cluster connection kept open; there -# are facilities to pass around global info to requests and to -# tear down connections between requests if it becomes important +def api_setup(app, conf, cluster, clientname, clientid, args): + ''' + This is done globally, and cluster connection kept open for + the lifetime of the daemon. librados should assure that even + if the cluster goes away and comes back, our connection remains. -def api_setup(): - """ Initialize the running instance. Open the cluster, get the command - signatures, module, perms, and help; stuff them away in the glob.urls - dict. - """ + signatures, module, perms, and help; stuff them away in the app.ceph_urls + dict. Also save app.ceph_sigdict for help() handling. + ''' + def get_command_descriptions(cluster, target=('mon','')): + ret, outbuf, outs = json_command(cluster, target, + prefix='get_command_descriptions', + timeout=30) + if ret: + err = "Can't get command descriptions: {0}".format(outs) + app.logger.error(err) + raise EnvironmentError(ret, err) - conffile = os.environ.get('CEPH_CONF', '') - clustername = os.environ.get('CEPH_CLUSTER_NAME', 'ceph') - clientname = os.environ.get('CEPH_NAME', DEFAULT_CLIENTNAME) - try: - err = '' - cfg = load_conf(clustername, conffile) - except Exception as e: - err = "Can't load Ceph conf file: " + str(e) - app.logger.critical(err) - app.logger.critical("CEPH_CONF: %s", conffile) - app.logger.critical("CEPH_CLUSTER_NAME: %s", clustername) - raise EnvironmentError(err) - - client_logfile = '/var/log/ceph' + clientname + '.log' - - glob.cluster = rados.Rados(name=clientname, conffile=conffile) - glob.cluster.connect() - - glob.baseurl = get_conf(cfg, clientname, 'base_url') or DEFAULT_BASEURL - if glob.baseurl.endswith('/'): - glob.baseurl - addr = get_conf(cfg, clientname, 'public_addr') or DEFAULT_ADDR - addrport = addr.rsplit(':', 1) - addr = addrport[0] - if len(addrport) > 1: - port = addrport[1] - else: - port = DEFAULT_ADDR.rsplit(':', 1) + try: + sigdict = parse_json_funcsigs(outbuf, 'rest') + except Exception as e: + err = "Can't parse command descriptions: {}".format(e) + app.logger.error(err) + raise EnvironmentError(err) + return sigdict + + app.ceph_cluster = cluster or 'ceph' + app.ceph_urls = {} + app.ceph_sigdict = {} + app.ceph_baseurl = '' + + conf = conf or '' + cluster = cluster or 'ceph' + clientid = clientid or DEFAULT_ID + clientname = clientname or 'client.' + clientid + + app.ceph_cluster = rados.Rados(name=clientname, conffile=conf) + app.ceph_cluster.conf_parse_argv(args) + app.ceph_cluster.connect() + + app.ceph_baseurl = app.ceph_cluster.conf_get('restapi_base_url') \ + or DEFAULT_BASEURL + if app.ceph_baseurl.endswith('/'): + app.ceph_baseurl = app.ceph_baseurl[:-1] + addr = app.ceph_cluster.conf_get('public_addr') or DEFAULT_ADDR + + # remove any nonce from the conf value + addr = addr.split('/')[0] + addr, port = addr.rsplit(':', 1) + addr = addr or DEFAULT_ADDR + port = port or DEFAULT_PORT port = int(port) - loglevel = get_conf(cfg, clientname, 'log_level') or DEFAULT_LOG_LEVEL - logfile = get_conf(cfg, clientname, 'log_file') or client_logfile + loglevel = app.ceph_cluster.conf_get('restapi_log_level') \ + or DEFAULT_LOG_LEVEL + logfile = app.ceph_cluster.conf_get('log_file') app.logger.addHandler(logging.handlers.WatchedFileHandler(logfile)) app.logger.setLevel(LOGLEVELS[loglevel.lower()]) for h in app.logger.handlers: h.setFormatter(logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s')) - ret, outbuf, outs = json_command(glob.cluster, - prefix='get_command_descriptions') - if ret: - err = "Can't contact cluster for command descriptions: {0}".format(outs) - app.logger.error(err) - raise EnvironmentError(ret, err) - - try: - glob.sigdict = parse_json_funcsigs(outbuf, 'rest') - except Exception as e: - err = "Can't parse command descriptions: {}".format(e) - app.logger.error(err) - raise EnvironmentError(err) - - # glob.sigdict maps "cmdNNN" to a dict containing: + app.ceph_sigdict = get_command_descriptions(app.ceph_cluster) + + osdid = find_up_osd(app) + if osdid: + osd_sigdict = get_command_descriptions(app.ceph_cluster, + target=('osd', int(osdid))) + + # shift osd_sigdict keys up to fit at the end of the mon's app.ceph_sigdict + maxkey = sorted(app.ceph_sigdict.keys())[-1] + maxkey = int(maxkey.replace('cmd', '')) + osdkey = maxkey + 1 + for k, v in osd_sigdict.iteritems(): + newv = v + newv['flavor'] = 'tell' + globk = 'cmd' + str(osdkey) + app.ceph_sigdict[globk] = newv + osdkey += 1 + + # app.ceph_sigdict maps "cmdNNN" to a dict containing: # 'sig', an array of argdescs # 'help', the helptext # 'module', the Ceph module this command relates to @@ -167,94 +150,130 @@ def api_setup(): # a hint as to whether this is a GET or POST/PUT operation # 'avail', a comma-separated list of strings of consumers that should # display this command (filtered by parse_json_funcsigs() above) - glob.urls = {} - for cmdnum, cmddict in glob.sigdict.iteritems(): + app.ceph_urls = {} + for cmdnum, cmddict in app.ceph_sigdict.iteritems(): cmdsig = cmddict['sig'] - url, params = generate_url_and_params(cmdsig) - if url in glob.urls: - continue + flavor = cmddict.get('flavor', 'mon') + url, params = generate_url_and_params(app, cmdsig, flavor) + perm = cmddict['perm'] + for k in METHOD_DICT.iterkeys(): + if k in perm: + methods = METHOD_DICT[k] + urldict = {'paramsig':params, + 'help':cmddict['help'], + 'module':cmddict['module'], + 'perm':perm, + 'flavor':flavor, + 'methods':methods, + } + + # app.ceph_urls contains a list of urldicts (usually only one long) + if url not in app.ceph_urls: + app.ceph_urls[url] = [urldict] else: - perm = cmddict['perm'] - urldict = {'paramsig':params, - 'help':cmddict['help'], - 'module':cmddict['module'], - 'perm':perm, - } - method_dict = {'r':['GET'], - 'w':['PUT', 'DELETE']} - for k in method_dict.iterkeys(): - if k in perm: - methods = method_dict[k] - app.add_url_rule(url, url, handler, methods=methods) - glob.urls[url] = urldict - - url += '.<fmt>' - app.add_url_rule(url, url, handler, methods=methods) - glob.urls[url] = urldict - app.logger.debug("urls added: %d", len(glob.urls)) + # If more than one, need to make union of methods of all. + # Method must be checked in handler + methodset = set(methods) + for old_urldict in app.ceph_urls[url]: + methodset |= set(old_urldict['methods']) + methods = list(methodset) + app.ceph_urls[url].append(urldict) + + # add, or re-add, rule with all methods and urldicts + app.add_url_rule(url, url, handler, methods=methods) + url += '.<fmt>' + app.add_url_rule(url, url, handler, methods=methods) + + app.logger.debug("urls added: %d", len(app.ceph_urls)) app.add_url_rule('/<path:catchall_path>', '/<path:catchall_path>', handler, methods=['GET', 'PUT']) return addr, port -def generate_url_and_params(sig): - """ +def generate_url_and_params(app, sig, flavor): + ''' Digest command signature from cluster; generate an absolute - (including glob.baseurl) endpoint from all the prefix words, - and a dictionary of non-prefix parameters - """ + (including app.ceph_baseurl) endpoint from all the prefix words, + and a list of non-prefix param descs + ''' url = '' params = [] + # the OSD command descriptors don't include the 'tell <target>', so + # tack it onto the front of sig + if flavor == 'tell': + tellsig = parse_funcsig(['tell', + {'name':'target', 'type':'CephOsdName'}]) + sig = tellsig + sig + for desc in sig: + # prefixes go in the URL path if desc.t == CephPrefix: url += '/' + desc.instance.prefix + # CephChoices with 1 required string (not --) do too, unless + # we've already started collecting params, in which case they + # too are params elif desc.t == CephChoices and \ len(desc.instance.strings) == 1 and \ desc.req and \ - not str(desc.instance).startswith('--'): - url += '/' + str(desc.instance) + not str(desc.instance).startswith('--') and \ + not params: + url += '/' + str(desc.instance) else: - params.append(desc) - return glob.baseurl + url, params + # tell/<target> is a weird case; the URL includes what + # would everywhere else be a parameter + if flavor == 'tell' and \ + (desc.t, desc.name) == (CephOsdName, 'target'): + url += '/<target>' + else: + params.append(desc) + + return app.ceph_baseurl + url, params -def concise_sig_for_uri(sig): - """ +# +# end setup (import-time) functions, begin request-time functions +# + +def concise_sig_for_uri(sig, flavor): + ''' Return a generic description of how one would send a REST request for sig - """ + ''' prefix = [] args = [] + ret = '' + if flavor == 'tell': + ret = 'tell/<osdid-or-pgid>/' for d in sig: if d.t == CephPrefix: prefix.append(d.instance.prefix) else: args.append(d.name + '=' + str(d)) - sig = '/'.join(prefix) + ret += '/'.join(prefix) if args: - sig += '?' + '&'.join(args) - return sig + ret += '?' + '&'.join(args) + return ret def show_human_help(prefix): - """ + ''' Dump table showing commands matching prefix - """ - # XXX this really needs to be a template - #s = '<html><body><style>.colhalf { width: 50%;} body{word-wrap:break-word;}</style>' - #s += '<table border=1><col class=colhalf /><col class=colhalf />' - #s += '<th>Possible commands:</th>' - # XXX the above mucking with css doesn't cause sensible columns. + ''' + # XXX There ought to be a better discovery mechanism than an HTML table s = '<html><body><table border=1><th>Possible commands:</th><th>Method</th><th>Description</th>' - possible = [] permmap = {'r':'GET', 'rw':'PUT'} line = '' - for cmdsig in sorted(glob.sigdict.itervalues(), cmp=descsort): + for cmdsig in sorted(app.ceph_sigdict.itervalues(), cmp=descsort): concise = concise_sig(cmdsig['sig']) + flavor = cmdsig.get('flavor', 'mon') + if flavor == 'tell': + concise = 'tell/<target>/' + concise if concise.startswith(prefix): line = ['<tr><td>'] - wrapped_sig = textwrap.wrap(concise_sig_for_uri(cmdsig['sig']), 40) + wrapped_sig = textwrap.wrap( + concise_sig_for_uri(cmdsig['sig'], flavor), 40 + ) for sigline in wrapped_sig: line.append(flask.escape(sigline) + '\n') line.append('</td><td>') @@ -272,23 +291,22 @@ def show_human_help(prefix): @app.before_request def log_request(): - """ + ''' For every request, log it. XXX Probably overkill for production - """ + ''' app.logger.info(flask.request.url + " from " + flask.request.remote_addr + " " + flask.request.user_agent.string) app.logger.debug("Accept: %s", flask.request.accept_mimetypes.values()) - @app.route('/') def root_redir(): - return flask.redirect(glob.baseurl) + return flask.redirect(app.ceph_baseurl) def make_response(fmt, output, statusmsg, errorcode): - """ + ''' If formatted output, cobble up a response object that contains the output and status wrapped in enclosing objects; if nonformatted, just - use output. Return HTTP status errorcode in any event. - """ + use output+status. Return HTTP status errorcode in any event. + ''' response = output if fmt: if 'json' in fmt: @@ -300,6 +318,7 @@ def make_response(fmt, output, statusmsg, errorcode): return flask.make_response("Error decoding JSON from " + output, 500) elif 'xml' in fmt: + # XXX # one is tempted to do this with xml.etree, but figuring out how # to 'un-XML' the XML-dumped output so it can be reassembled into # a piece of the tree here is beyond me right now. @@ -319,22 +338,32 @@ def make_response(fmt, output, statusmsg, errorcode): {1} </status> </response>'''.format(response, xml.sax.saxutils.escape(statusmsg)) + else: + if not 200 <= errorcode < 300: + response = response + '\n' + statusmsg + '\n' return flask.make_response(response, errorcode) -def handler(catchall_path=None, fmt=None): - """ - Main endpoint handler; generic for every endpoint - """ +def handler(catchall_path=None, fmt=None, target=None): + ''' + Main endpoint handler; generic for every endpoint, including catchall. + Handles the catchall, anything with <.fmt>, anything with embedded + <target>. Partial match or ?help cause the HTML-table + "show_human_help" output. + ''' - if (catchall_path): - ep = catchall_path.replace('.<fmt>', '') - else: - ep = flask.request.endpoint.replace('.<fmt>', '') + ep = catchall_path or flask.request.endpoint + ep = ep.replace('.<fmt>', '') if ep[0] != '/': ep = '/' + ep + # demand that endpoint begin with app.ceph_baseurl + if not ep.startswith(app.ceph_baseurl): + return make_response(fmt, '', 'Page not found', 404) + + rel_ep = ep[len(app.ceph_baseurl)+1:] + # Extensions override Accept: headers override defaults if not fmt: if 'application/json' in flask.request.accept_mimetypes.values(): @@ -342,15 +371,38 @@ def handler(catchall_path=None, fmt=None): elif 'application/xml' in flask.request.accept_mimetypes.values(): fmt = 'xml' - # demand that endpoint begin with glob.baseurl - if not ep.startswith(glob.baseurl): - return make_response(fmt, '', 'Page not found', 404) + prefix = '' + pgid = None + cmdtarget = 'mon', '' - relative_endpoint = ep[len(glob.baseurl)+1:] - prefix = ' '.join(relative_endpoint.split('/')).strip() + if target: + # got tell/<target>; validate osdid or pgid + name = CephOsdName() + pgidobj = CephPgid() + try: + name.valid(target) + except ArgumentError: + # try pgid + try: + pgidobj.valid(target) + except ArgumentError: + return flask.make_response("invalid osdid or pgid", 400) + else: + # it's a pgid + pgid = pgidobj.val + cmdtarget = 'pg', pgid + else: + # it's an osd + cmdtarget = name.nametype, name.nameid + + # prefix does not include tell/<target>/ + prefix = ' '.join(rel_ep.split('/')[2:]).strip() + else: + # non-target command: prefix is entire path + prefix = ' '.join(rel_ep.split('/')).strip() # show "match as much as you gave me" help for unknown endpoints - if not ep in glob.urls: + if not ep in app.ceph_urls: helptext = show_human_help(prefix) if helptext: resp = flask.make_response(helptext, 400) @@ -359,43 +411,59 @@ def handler(catchall_path=None, fmt=None): else: return make_response(fmt, '', 'Invalid endpoint ' + ep, 400) - urldict = glob.urls[ep] - paramsig = urldict['paramsig'] - - # allow '?help' for any specifically-known endpoint - if 'help' in flask.request.args: - response = flask.make_response('{0}: {1}'.\ - format(prefix + concise_sig(paramsig), urldict['help'])) - response.headers['Content-Type'] = 'text/plain' - return response - - # if there are parameters for this endpoint, process them - if paramsig: - args = {} - for k, l in flask.request.args.iterlists(): - if len(l) == 1: - args[k] = l[0] - else: - args[k] = l - - # is this a valid set of params? - try: - argdict = validate(args, paramsig) - except Exception as e: - return make_response(fmt, '', str(e) + '\n', 400) - else: - # no parameters for this endpoint; complain if args are supplied - if flask.request.args: - return make_response(fmt, '', ep + 'takes no params', 400) - argdict = {} + found = None + exc = '' + for urldict in app.ceph_urls[ep]: + if flask.request.method not in urldict['methods']: + continue + paramsig = urldict['paramsig'] + + # allow '?help' for any specifically-known endpoint + if 'help' in flask.request.args: + response = flask.make_response('{0}: {1}'.\ + format(prefix + concise_sig(paramsig), urldict['help'])) + response.headers['Content-Type'] = 'text/plain' + return response + + # if there are parameters for this endpoint, process them + if paramsig: + args = {} + for k, l in flask.request.args.iterlists(): + if len(l) == 1: + args[k] = l[0] + else: + args[k] = l + + # is this a valid set of params? + try: + argdict = validate(args, paramsig) + found = urldict + break + except Exception as e: + exc += str(e) + continue + else: + if flask.request.args: + continue + found = urldict + argdict = {} + break + if not found: + return make_response(fmt, '', exc + '\n', 400) argdict['format'] = fmt or 'plain' - argdict['module'] = urldict['module'] - argdict['perm'] = urldict['perm'] + argdict['module'] = found['module'] + argdict['perm'] = found['perm'] + if pgid: + argdict['pgid'] = pgid + + if not cmdtarget: + cmdtarget = ('mon', '') app.logger.debug('sending command prefix %s argdict %s', prefix, argdict) - ret, outbuf, outs = json_command(glob.cluster, prefix=prefix, + ret, outbuf, outs = json_command(app.ceph_cluster, prefix=prefix, + target=cmdtarget, inbuf=flask.request.data, argdict=argdict) if ret: return make_response(fmt, '', 'Error: {0} ({1})'.format(outs, ret), 400) @@ -408,4 +476,12 @@ def handler(catchall_path=None, fmt=None): response.headers['Content-Type'] = contenttype return response -addr, port = api_setup() +# +# Main entry point from wrapper/WSGI server: call with cmdline args, +# get back the WSGI app entry point +# +def generate_app(conf, cluster, clientname, clientid, args): + addr, port = api_setup(app, conf, cluster, clientname, clientid, args) + app.ceph_addr = addr + app.ceph_port = port + return app diff --git a/src/pybind/cephfs.py b/src/pybind/cephfs.py index f89f53fb194..80b7e4b773f 100644 --- a/src/pybind/cephfs.py +++ b/src/pybind/cephfs.py @@ -2,9 +2,7 @@ This module is a thin wrapper around libcephfs. """ from ctypes import CDLL, c_char_p, c_size_t, c_void_p, c_int, c_long, c_uint, c_ulong, \ - create_string_buffer, byref, Structure, c_uint64, c_ubyte, pointer, \ - CFUNCTYPE -import ctypes + create_string_buffer, byref, Structure import errno class Error(Exception): diff --git a/src/pybind/rados.py b/src/pybind/rados.py index 13ce006f8ce..e543ff79305 100644 --- a/src/pybind/rados.py +++ b/src/pybind/rados.py @@ -278,7 +278,7 @@ Rados object in state %s." % (self.state)) ret = run_in_thread(self.librados.rados_conf_parse_argv_remainder, (self.cluster, len(args), cargs, cretargs)) if ret: - raise make_ex("error calling conf_parse_argv_remainder") + raise make_ex(ret, "error calling conf_parse_argv_remainder") # cretargs was allocated with fixed length; collapse return # list to eliminate any missing args diff --git a/src/pybind/rbd.py b/src/pybind/rbd.py index 9d71738e728..6e9ca8a2252 100644 --- a/src/pybind/rbd.py +++ b/src/pybind/rbd.py @@ -17,7 +17,7 @@ methods, a :class:`TypeError` will be raised. # Copyright 2011 Josh Durgin from ctypes import CDLL, c_char, c_char_p, c_size_t, c_void_p, c_int, \ create_string_buffer, byref, Structure, c_uint64, c_int64, c_uint8, \ - CFUNCTYPE, pointer + CFUNCTYPE import ctypes import errno diff --git a/src/rbd.cc b/src/rbd.cc index c9b2f0a272c..7f90c1f118e 100644 --- a/src/rbd.cc +++ b/src/rbd.cc @@ -60,8 +60,6 @@ #include <sys/param.h> #endif -#include "include/fiemap.h" - #define MAX_SECRET_LEN 1000 #define MAX_POOL_NAME_SIZE 128 diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 67f5f1c68b3..364f60f78f7 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -1815,7 +1815,7 @@ next: cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl; return ret; } - ret = store->meta_mgr->put(metadata_key, bl); + ret = store->meta_mgr->put(metadata_key, bl, RGWMetadataHandler::APPLY_ALWAYS); if (ret < 0) { cerr << "ERROR: can't put key: " << cpp_strerror(-ret) << std::endl; return -ret; diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 8de5a3d101f..bf8da99d616 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -1397,7 +1397,8 @@ public: return 0; } - int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { RGWBucketEntryPoint be, old_be; decode_json_obj(be, obj); @@ -1410,6 +1411,12 @@ public: if (ret < 0 && ret != -ENOENT) return ret; + // are we actually going to perform this put, or is it too old? + if (!check_versions(old_ot.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_type)) { + return STATUS_NO_APPLY; + } + objv_tracker.read_version = old_ot.read_version; /* maintain the obj version we just read */ ret = store->put_bucket_entrypoint_info(entry, be, false, objv_tracker, mtime, &attrs); @@ -1540,7 +1547,8 @@ public: return 0; } - int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { RGWBucketCompleteInfo bci, old_bci; decode_json_obj(bci, obj); @@ -1566,6 +1574,13 @@ public: bci.info.bucket.index_pool = old_bci.info.bucket.index_pool; } + // are we actually going to perform this put, or is it too old? + if (!check_versions(old_bci.info.objv_tracker.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_type)) { + objv_tracker.read_version = old_bci.info.objv_tracker.read_version; + return STATUS_NO_APPLY; + } + /* record the read version (if any), store the new version */ bci.info.objv_tracker.read_version = old_bci.info.objv_tracker.read_version; bci.info.objv_tracker.write_version = objv_tracker.write_version; @@ -1580,7 +1595,7 @@ public: if (ret < 0) return ret; - return 0; + return STATUS_APPLIED; } struct list_keys_info { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 7f224a798f5..543bdf21377 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -99,6 +99,8 @@ using ceph::crypto::MD5; #define STATUS_NO_CONTENT 1902 #define STATUS_PARTIAL_CONTENT 1903 #define STATUS_REDIRECT 1904 +#define STATUS_NO_APPLY 1905 +#define STATUS_APPLIED 1906 #define ERR_INVALID_BUCKET_NAME 2000 #define ERR_INVALID_OBJECT_NAME 2001 diff --git a/src/rgw/rgw_formats.h b/src/rgw/rgw_formats.h index 0ae917fe7d1..3653ef4f480 100644 --- a/src/rgw/rgw_formats.h +++ b/src/rgw/rgw_formats.h @@ -36,6 +36,9 @@ public: virtual void dump_string(const char *name, std::string s); virtual std::ostream& dump_stream(const char *name); virtual void dump_format(const char *name, const char *fmt, ...); + virtual void dump_format_unquoted(const char *name, const char *fmt, ...) { + assert(0 == "not implemented"); + } virtual int get_len() const; virtual void write_raw_data(const char *data); diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc index ac8c703f5e0..6da1ff5ab24 100644 --- a/src/rgw/rgw_metadata.cc +++ b/src/rgw/rgw_metadata.cc @@ -194,7 +194,8 @@ public: virtual string get_type() { return string(); } virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) { return -ENOTSUP; } - virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { return -ENOTSUP; } + virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_type) { return -ENOTSUP; } virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) {} @@ -242,6 +243,7 @@ RGWMetadataManager::~RGWMetadataManager() } handlers.clear(); + delete md_log; } int RGWMetadataManager::register_handler(RGWMetadataHandler *handler) @@ -328,7 +330,9 @@ int RGWMetadataManager::get(string& metadata_key, Formatter *f) return 0; } -int RGWMetadataManager::put(string& metadata_key, bufferlist& bl) +int RGWMetadataManager::put(string& metadata_key, bufferlist& bl, + RGWMetadataHandler::sync_type_t sync_type, + obj_version *existing_version) { RGWMetadataHandler *handler; string entry; @@ -357,7 +361,11 @@ int RGWMetadataManager::put(string& metadata_key, bufferlist& bl) return -EINVAL; } - return handler->put(store, entry, objv_tracker, mtime, jo); + ret = handler->put(store, entry, objv_tracker, mtime, jo, sync_type); + if (existing_version) { + *existing_version = objv_tracker.read_version; + } + return ret; } int RGWMetadataManager::remove(string& metadata_key) @@ -505,15 +513,15 @@ int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, /* if write version has not been set, and there's a read version, set it so that we can * log it */ - if (objv_tracker && objv_tracker->read_version.ver && - !objv_tracker->write_version.ver) { - objv_tracker->write_version = objv_tracker->read_version; - objv_tracker->write_version.ver++; + if (objv_tracker) { + if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) { + objv_tracker->write_version = objv_tracker->read_version; + objv_tracker->write_version.ver++; + } + log_data.read_version = objv_tracker->read_version; + log_data.write_version = objv_tracker->write_version; } - log_data.read_version = objv_tracker->read_version; - log_data.write_version = objv_tracker->write_version; - log_data.status = op_type; bufferlist logbl; diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h index 2cc9110191a..50649b6f901 100644 --- a/src/rgw/rgw_metadata.h +++ b/src/rgw/rgw_metadata.h @@ -44,14 +44,30 @@ class RGWMetadataManager; class RGWMetadataHandler { friend class RGWMetadataManager; -protected: - virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0; public: + enum sync_type_t { + APPLY_ALWAYS, + APPLY_UPDATES, + APPLY_NEWER + }; + static bool string_to_sync_type(const string& sync_string, + sync_type_t& type) { + if (sync_string.compare("update-by-version") == 0) + type = APPLY_UPDATES; + else if (sync_string.compare("update-by-timestamp") == 0) + type = APPLY_NEWER; + else if (sync_string.compare("always") == 0) + type = APPLY_ALWAYS; + else + return false; + return true; + } virtual ~RGWMetadataHandler() {} virtual string get_type() = 0; virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0; - virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) = 0; + virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t type) = 0; virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0; virtual int list_keys_init(RGWRados *store, void **phandle) = 0; @@ -62,6 +78,33 @@ public: virtual void get_hash_key(const string& section, const string& key, string& hash_key) { hash_key = section + ":" + key; } + +protected: + virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0; + /** + * Compare an incoming versus on-disk tag/version+mtime combo against + * the sync mode to see if the new one should replace the on-disk one. + * + * @return true if the update should proceed, false otherwise. + */ + bool check_versions(const obj_version& ondisk, const time_t& ondisk_time, + const obj_version& incoming, const time_t& incoming_time, + sync_type_t sync_mode) { + switch (sync_mode) { + case APPLY_UPDATES: + if ((ondisk.tag != incoming.tag) || + (ondisk.ver >= incoming.ver)) + return false; + break; + case APPLY_NEWER: + if (ondisk_time >= incoming_time) + return false; + break; + case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! + default: break; + } + return true; + } }; #define META_LOG_OBJ_PREFIX "meta.log." @@ -86,9 +129,7 @@ class RGWMetadataLog { } public: - RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store) { - prefix = META_LOG_OBJ_PREFIX; - } + RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store), prefix(META_LOG_OBJ_PREFIX) {} int add_entry(RGWRados *store, RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl); @@ -102,7 +143,7 @@ public: bool done; - LogListCtx() : done(false) {} + LogListCtx() : cur_shard(0), done(false) {} }; void init_list_entries(int shard_id, utime_t& from_time, utime_t& end_time, string& marker, void **handle); @@ -152,7 +193,9 @@ public: map<string, bufferlist>* rmattrs, RGWObjVersionTracker *objv_tracker); int get(string& metadata_key, Formatter *f); - int put(string& metadata_key, bufferlist& bl); + int put(string& metadata_key, bufferlist& bl, + RGWMetadataHandler::sync_type_t sync_mode, + obj_version *existing_version = NULL); int remove(string& metadata_key); int list_keys_init(string& section, void **phandle); diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc index 544adbe7965..7b50986cb37 100644 --- a/src/rgw/rgw_rest_log.cc +++ b/src/rgw/rgw_rest_log.cc @@ -88,6 +88,8 @@ void RGWOp_MDLog_List::execute() { if (!max_entries_str.empty()) max_entries -= entries.size(); } while (truncated && (max_entries > 0)); + + meta_log->complete_list_entries(handle); } void RGWOp_MDLog_List::send_response() { diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc index 35ec0ab9b04..0705a46ed6c 100644 --- a/src/rgw/rgw_rest_metadata.cc +++ b/src/rgw/rgw_rest_metadata.cc @@ -161,12 +161,43 @@ void RGWOp_Metadata_Put::execute() { frame_metadata_key(s, metadata_key); - http_ret = store->meta_mgr->put(metadata_key, bl); + RGWMetadataHandler::sync_type_t sync_type = RGWMetadataHandler::APPLY_ALWAYS; + + bool mode_exists = false; + string mode_string = s->info.args.get("sync-type", &mode_exists); + if (mode_exists) { + bool parsed = RGWMetadataHandler::string_to_sync_type(mode_string, + sync_type); + if (!parsed) { + http_ret = -EINVAL; + return; + } + } + + http_ret = store->meta_mgr->put(metadata_key, bl, sync_type, &ondisk_version); if (http_ret < 0) { dout(5) << "ERROR: can't put key: " << cpp_strerror(http_ret) << dendl; return; } - http_ret = 0; + // translate internal codes into return header + if (http_ret == STATUS_NO_APPLY) + update_status = "skipped"; + else if (http_ret == STATUS_APPLIED) + update_status = "applied"; +} + +void RGWOp_Metadata_Put::send_response() { + int http_return_code = http_ret; + if ((http_ret == STATUS_NO_APPLY) || (http_ret == STATUS_APPLIED)) + http_return_code = STATUS_NO_CONTENT; + set_req_state_err(s, http_return_code); + dump_errno(s); + stringstream ver_stream; + ver_stream << "ver:" << ondisk_version.ver + <<",tag:" << ondisk_version.tag; + dump_pair(s, "RGWX_UPDATE_STATUS", update_status.c_str()); + dump_pair(s, "RGWX_UPDATE_VERSION", ver_stream.str().c_str()); + end_header(s); } void RGWOp_Metadata_Delete::execute() { diff --git a/src/rgw/rgw_rest_metadata.h b/src/rgw/rgw_rest_metadata.h index 59d7c5f7045..7f3cf1f2207 100644 --- a/src/rgw/rgw_rest_metadata.h +++ b/src/rgw/rgw_rest_metadata.h @@ -40,6 +40,8 @@ public: class RGWOp_Metadata_Put : public RGWRESTOp { int get_data(bufferlist& bl); + string update_status; + obj_version ondisk_version; public: RGWOp_Metadata_Put() {} ~RGWOp_Metadata_Put() {} @@ -48,6 +50,7 @@ public: return caps.check_cap("metadata", RGW_CAP_WRITE); } void execute(); + void send_response(); virtual const string name() { return "set_metadata"; } }; diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index 6cdcaa62935..6fcecd4a98d 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -2295,22 +2295,29 @@ public: return 0; } - int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { + int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + time_t mtime, JSONObj *obj, sync_type_t sync_mode) { RGWUserInfo info; decode_json_obj(info, obj); RGWUserInfo old_info; - int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker); + time_t orig_mtime; + int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker, &orig_mtime); if (ret < 0 && ret != -ENOENT) return ret; + // are we actually going to perform this put, or is it too old? + if (!check_versions(objv_tracker.read_version, orig_mtime, + objv_tracker.write_version, mtime, sync_mode)) { + return STATUS_NO_APPLY; + } ret = rgw_store_user_info(store, info, &old_info, &objv_tracker, mtime, false); if (ret < 0) return ret; - return 0; + return STATUS_APPLIED; } struct list_keys_info { diff --git a/src/test/ObjectMap/test_store_tool/test_store_tool.cc b/src/test/ObjectMap/test_store_tool/test_store_tool.cc index 4ab62892880..ace91220df6 100644 --- a/src/test/ObjectMap/test_store_tool/test_store_tool.cc +++ b/src/test/ObjectMap/test_store_tool/test_store_tool.cc @@ -19,6 +19,12 @@ #include "os/LevelDBStore.h" +#include "common/ceph_argparse.h" +#include "global/global_init.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/config.h" + using namespace std; class StoreTool @@ -98,15 +104,27 @@ void usage(const char *pname) << std::endl; } -int main(int argc, char *argv[]) +int main(int argc, const char *argv[]) { - if (argc < 3) { + vector<const char*> args; + argv_to_vec(argc, argv, args); + env_to_vec(args); + + global_init( + NULL, args, + CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + + if (args.size() < 2) { usage(argv[0]); return 1; } - string path(argv[1]); - string cmd(argv[2]); + string path(args[0]); + string cmd(args[1]); + + std::cout << "path: " << path << " cmd " << cmd << std::endl; StoreTool st(path); diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc index 0dbcb6d2f75..8f67918e644 100644 --- a/src/test/admin_socket.cc +++ b/src/test/admin_socket.cc @@ -64,13 +64,13 @@ TEST(AdminSocket, SendNoOp) { ASSERT_EQ(true, asoct.init(get_rand_socket_path())); AdminSocketClient client(get_rand_socket_path()); string version; - ASSERT_EQ("", client.do_request("0", &version)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"0\"}", &version)); ASSERT_EQ(CEPH_ADMIN_SOCK_VERSION, version); ASSERT_EQ(true, asoct.shutdown()); } class MyTest : public AdminSocketHook { - bool call(std::string command, std::string args, bufferlist& result) { + bool call(std::string command, std::string args, std::string format, bufferlist& result) { result.append(command); result.append("|"); result.append(args); @@ -87,13 +87,13 @@ TEST(AdminSocket, RegisterCommand) { AdminSocketClient client(get_rand_socket_path()); ASSERT_EQ(0, asoct.m_asokc->register_command("test", "test", new MyTest(), "")); string result; - ASSERT_EQ("", client.do_request("test", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test\"}", &result)); ASSERT_EQ("test|", result); ASSERT_EQ(true, asoct.shutdown()); } class MyTest2 : public AdminSocketHook { - bool call(std::string command, std::string args, bufferlist& result) { + bool call(std::string command, std::string args, std::string format, bufferlist& result) { result.append(command); result.append("|"); result.append(args); @@ -111,20 +111,20 @@ TEST(AdminSocket, RegisterCommandPrefixes) { ASSERT_EQ(0, asoct.m_asokc->register_command("test", "test", new MyTest(), "")); ASSERT_EQ(0, asoct.m_asokc->register_command("test command", "test command", new MyTest2(), "")); string result; - ASSERT_EQ("", client.do_request("test", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test\"}", &result)); ASSERT_EQ("test|", result); - ASSERT_EQ("", client.do_request("test command", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test command\"}", &result)); ASSERT_EQ("test command|", result); - ASSERT_EQ("", client.do_request("test command post", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test command post\"}", &result)); ASSERT_EQ("test command|post", result); - ASSERT_EQ("", client.do_request("test command post", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test command post\"}", &result)); ASSERT_EQ("test command| post", result); - ASSERT_EQ("", client.do_request("test this thing", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test this thing\"}", &result)); ASSERT_EQ("test|this thing", result); - ASSERT_EQ("", client.do_request("test command post", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test command post\"}", &result)); ASSERT_EQ("test| command post", result); - ASSERT_EQ("", client.do_request("test this thing", &result)); + ASSERT_EQ("", client.do_request("{\"prefix\":\"test this thing\"}", &result)); ASSERT_EQ("test| this thing", result); ASSERT_EQ(true, asoct.shutdown()); } diff --git a/src/test/bench/small_io_bench_fs.cc b/src/test/bench/small_io_bench_fs.cc index 61fbacc5570..a37a7e71153 100644 --- a/src/test/bench/small_io_bench_fs.cc +++ b/src/test/bench/small_io_bench_fs.cc @@ -32,7 +32,10 @@ struct MorePrinting : public DetailedStatCollector::AdditionalPrinting { MorePrinting(CephContext *cct) : cct(cct) {} void operator()(std::ostream *out) { bufferlist bl; - cct->get_perfcounters_collection()->write_json_to_buf(bl, 0); + Formatter *f = new_formatter("json-pretty"); + cct->get_perfcounters_collection()->dump_formatted(f, 0); + f->flush(bl); + delete f; bl.append('\0'); *out << bl.c_str() << std::endl; } diff --git a/src/test/cli/ceph-authtool/help.t b/src/test/cli/ceph-authtool/help.t index 7434ec4a7c0..062c967a154 100644 --- a/src/test/cli/ceph-authtool/help.t +++ b/src/test/cli/ceph-authtool/help.t @@ -10,9 +10,15 @@ 'mount -o secret=..' argument -C, --create-keyring will create a new keyring, overwriting any existing keyringfile - --gen-key will generate a new secret key for the + -g, --gen-key will generate a new secret key for the specified entityname - --add-key will add an encoded key to the keyring + --gen-print-key will generate a new secret key without set it + to the keyringfile, prints the secret to stdout + --import-keyring will import the content of a given keyring + into the keyringfile + -u, --set-uid sets the auid (authenticated user id) for the + specified entityname + -a, --add-key will add an encoded key to the keyring --cap subsystem capability will set the capability for given subsystem --caps capsfile will set all of capabilities associated with a given key, for all subsystems diff --git a/src/test/cli/ceph-authtool/manpage.t b/src/test/cli/ceph-authtool/manpage.t index 69e15fa51c3..a9e1408d716 100644 --- a/src/test/cli/ceph-authtool/manpage.t +++ b/src/test/cli/ceph-authtool/manpage.t @@ -9,9 +9,15 @@ 'mount -o secret=..' argument -C, --create-keyring will create a new keyring, overwriting any existing keyringfile - --gen-key will generate a new secret key for the + -g, --gen-key will generate a new secret key for the specified entityname - --add-key will add an encoded key to the keyring + --gen-print-key will generate a new secret key without set it + to the keyringfile, prints the secret to stdout + --import-keyring will import the content of a given keyring + into the keyringfile + -u, --set-uid sets the auid (authenticated user id) for the + specified entityname + -a, --add-key will add an encoded key to the keyring --cap subsystem capability will set the capability for given subsystem --caps capsfile will set all of capabilities associated with a given key, for all subsystems diff --git a/src/test/cli/ceph-authtool/simple.t b/src/test/cli/ceph-authtool/simple.t index 69d027c1342..b86476a5ad0 100644 --- a/src/test/cli/ceph-authtool/simple.t +++ b/src/test/cli/ceph-authtool/simple.t @@ -9,9 +9,15 @@ 'mount -o secret=..' argument -C, --create-keyring will create a new keyring, overwriting any existing keyringfile - --gen-key will generate a new secret key for the + -g, --gen-key will generate a new secret key for the specified entityname - --add-key will add an encoded key to the keyring + --gen-print-key will generate a new secret key without set it + to the keyringfile, prints the secret to stdout + --import-keyring will import the content of a given keyring + into the keyringfile + -u, --set-uid sets the auid (authenticated user id) for the + specified entityname + -a, --add-key will add an encoded key to the keyring --cap subsystem capability will set the capability for given subsystem --caps capsfile will set all of capabilities associated with a given key, for all subsystems diff --git a/src/test/cls_log/test_cls_log.cc b/src/test/cls_log/test_cls_log.cc index a8d1b3d5300..ce97025f819 100644 --- a/src/test/cls_log/test_cls_log.cc +++ b/src/test/cls_log/test_cls_log.cc @@ -118,14 +118,10 @@ TEST(cls_rgw, test_log_add_same_time) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, false); @@ -206,14 +202,10 @@ TEST(cls_rgw, test_log_add_different_time) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, true); @@ -227,7 +219,6 @@ TEST(cls_rgw, test_log_add_different_time) string marker; /* check list */ - cls_log_list(*rop, start_time, to_time, marker, 0, entries, &marker, &truncated); bufferlist obl; @@ -258,7 +249,6 @@ TEST(cls_rgw, test_log_add_different_time) reset_rop(&rop); /* check list again with shifted time */ - utime_t next_time = get_time(start_time, 1, true); marker.clear(); @@ -289,7 +279,7 @@ TEST(cls_rgw, test_log_add_different_time) } while (truncated); ASSERT_EQ(10, i); - + delete rop; } TEST(cls_rgw, test_log_trim) @@ -305,14 +295,10 @@ TEST(cls_rgw, test_log_trim) /* add chains */ string oid = "obj"; - /* create object */ - ASSERT_EQ(0, ioctx.create(oid, true)); - /* generate log */ - utime_t start_time = ceph_clock_now(g_ceph_context); generate_log(ioctx, oid, 10, start_time, true); @@ -344,4 +330,5 @@ TEST(cls_rgw, test_log_trim) ASSERT_EQ(9 - i, (int)entries.size()); ASSERT_EQ(0, (int)truncated); } + delete rop; } diff --git a/src/test/cls_replica_log/test_cls_replica_log.cc b/src/test/cls_replica_log/test_cls_replica_log.cc index eabe0b3860d..8c204caef04 100644 --- a/src/test/cls_replica_log/test_cls_replica_log.cc +++ b/src/test/cls_replica_log/test_cls_replica_log.cc @@ -14,34 +14,42 @@ #include "cls/replica_log/cls_replica_log_client.h" #include "cls/replica_log/cls_replica_log_types.h" -#define SETUP_DATA \ - librados::Rados rados; \ - librados::IoCtx ioctx; \ - string pool_name = get_temp_pool_name(); \ - ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); \ - ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); \ - string oid = "obj"; \ - ASSERT_EQ(0, ioctx.create(oid, true)); - -#define ADD_MARKER \ - string entity = "tester_entity"; \ - string marker = "tester_marker1"; \ - utime_t time; \ - time.set_from_double(10); \ - list<pair<string, utime_t> > entries; \ - entries.push_back(make_pair("tester_obj1", time)); \ - time.set_from_double(20); \ - cls_replica_log_progress_marker progress; \ - cls_replica_log_prepare_marker(progress, entity, marker, time, &entries); \ - librados::ObjectWriteOperation opw; \ - cls_replica_log_update_bound(opw, progress); \ - ASSERT_EQ(0, ioctx.operate(oid, &opw)); - -TEST(cls_replica_log, test_set_get_marker) +class cls_replica_log_Test : public ::testing::Test { +public: + librados::Rados rados; + librados::IoCtx ioctx; + string pool_name; + string oid; + string entity; + string marker; + utime_t time; + list<pair<string, utime_t> > entries; + cls_replica_log_progress_marker progress; + + void SetUp() { + pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); + ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); + oid = "obj"; + ASSERT_EQ(0, ioctx.create(oid, true)); + } + + void add_marker() { + entity = "tester_entity"; + marker = "tester_marker1"; + time.set_from_double(10); + entries.push_back(make_pair("tester_obj1", time)); + time.set_from_double(20); + cls_replica_log_prepare_marker(progress, entity, marker, time, &entries); + librados::ObjectWriteOperation opw; + cls_replica_log_update_bound(opw, progress); + ASSERT_EQ(0, ioctx.operate(oid, &opw)); + } +}; + +TEST_F(cls_replica_log_Test, test_set_get_marker) { - SETUP_DATA - - ADD_MARKER + add_marker(); string reply_position_marker; utime_t reply_time; @@ -66,11 +74,9 @@ TEST(cls_replica_log, test_set_get_marker) ASSERT_EQ("tester_obj1", response_item_list.front().first); } -TEST(cls_replica_log, test_bad_update) +TEST_F(cls_replica_log_Test, test_bad_update) { - SETUP_DATA - - ADD_MARKER + add_marker(); time.set_from_double(15); cls_replica_log_progress_marker bad_marker; @@ -80,22 +86,18 @@ TEST(cls_replica_log, test_bad_update) ASSERT_EQ(-EINVAL, ioctx.operate(oid, &badw)); } -TEST(cls_replica_log, test_bad_delete) +TEST_F(cls_replica_log_Test, test_bad_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation badd; cls_replica_log_delete_bound(badd, entity); ASSERT_EQ(-ENOTEMPTY, ioctx.operate(oid, &badd)); } -TEST(cls_replica_log, test_good_delete) +TEST_F(cls_replica_log_Test, test_good_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation opc; progress.items.clear(); @@ -113,10 +115,8 @@ TEST(cls_replica_log, test_good_delete) ASSERT_EQ((unsigned)0, return_progress_list.size()); } -TEST(cls_replica_log, test_bad_get) +TEST_F(cls_replica_log_Test, test_bad_get) { - SETUP_DATA - string reply_position_marker; utime_t reply_time; list<cls_replica_log_progress_marker> return_progress_list; @@ -125,11 +125,9 @@ TEST(cls_replica_log, test_bad_get) reply_time, return_progress_list)); } -TEST(cls_replica_log, test_double_delete) +TEST_F(cls_replica_log_Test, test_double_delete) { - SETUP_DATA - - ADD_MARKER + add_marker(); librados::ObjectWriteOperation opc; progress.items.clear(); diff --git a/src/test/cls_statelog/test_cls_statelog.cc b/src/test/cls_statelog/test_cls_statelog.cc index 294b528f5db..a1b4cc34efc 100644 --- a/src/test/cls_statelog/test_cls_statelog.cc +++ b/src/test/cls_statelog/test_cls_statelog.cc @@ -70,6 +70,7 @@ static void get_entries_by_object(librados::IoCtx& ioctx, string& oid, cls_statelog_list(*rop, empty_str, op_id, object, marker, 0, entries, &marker, &truncated); ASSERT_EQ(0, ioctx.operate(oid, rop, &obl)); ASSERT_EQ(expected, (int)entries.size()); + delete rop; } static void get_entries_by_client_id(librados::IoCtx& ioctx, string& oid, @@ -84,6 +85,7 @@ static void get_entries_by_client_id(librados::IoCtx& ioctx, string& oid, cls_statelog_list(*rop, client_id, op_id, empty_str, marker, 0, entries, &marker, &truncated); ASSERT_EQ(0, ioctx.operate(oid, rop, &obl)); ASSERT_EQ(expected, (int)entries.size()); + delete rop; } static void get_all_entries(librados::IoCtx& ioctx, string& oid, list<cls_statelog_entry>& entries, int expected) @@ -203,5 +205,7 @@ TEST(cls_rgw, test_statelog_basic) string empty_str; get_entries_by_client_id(ioctx, oid, entries, e.client_id, empty_str, 4); get_entries_by_object(ioctx, oid, entries, e.object, empty_str, 1); + delete op; + delete rop; } diff --git a/src/test/cls_version/test_cls_version.cc b/src/test/cls_version/test_cls_version.cc index caa0a36cd74..4c2d59500df 100644 --- a/src/test/cls_version/test_cls_version.cc +++ b/src/test/cls_version/test_cls_version.cc @@ -55,6 +55,7 @@ TEST(cls_rgw, test_version_inc_read) ASSERT_NE(0, (int)ver.tag.size()); /* inc version again! */ + delete op; op = new_op(); cls_version_inc(*op); ASSERT_EQ(0, ioctx.operate(oid, op)); @@ -136,7 +137,6 @@ TEST(cls_rgw, test_version_inc_cond) /* add chains */ string oid = "obj"; - /* create object */ ASSERT_EQ(0, ioctx.create(oid, true)); @@ -160,6 +160,7 @@ TEST(cls_rgw, test_version_inc_cond) /* inc version again! */ + delete op; op = new_op(); cls_version_inc(*op); ASSERT_EQ(0, ioctx.operate(oid, op)); @@ -180,18 +181,22 @@ TEST(cls_rgw, test_version_inc_cond) ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag)); /* a bunch of conditions that should fail */ + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_EQ); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_LT); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_LE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_TAG_NE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op)); @@ -201,18 +206,22 @@ TEST(cls_rgw, test_version_inc_cond) ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag)); /* a bunch of conditions that should succeed */ + delete op; op = new_op(); cls_version_inc(*op, ver2, VER_COND_EQ); ASSERT_EQ(0, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_GT); ASSERT_EQ(0, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_GE); ASSERT_EQ(0, ioctx.operate(oid, op)); + delete op; op = new_op(); cls_version_inc(*op, cond_ver, VER_COND_TAG_EQ); ASSERT_EQ(0, ioctx.operate(oid, op)); @@ -261,20 +270,24 @@ TEST(cls_rgw, test_version_inc_check) bufferlist bl; ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_GE); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_LE); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_TAG_EQ); ASSERT_EQ(0, ioctx.operate(oid, rop, &bl)); obj_version ver2; + delete op; op = new_op(); cls_version_inc(*op); ASSERT_EQ(0, ioctx.operate(oid, op)); @@ -283,16 +296,23 @@ TEST(cls_rgw, test_version_inc_check) ASSERT_GT((long long)ver2.ver, (long long)ver.ver); ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag)); + delete op; + /* a bunch of conditions that should fail */ + delete rop; rop = new_rop(); cls_version_check(*rop, ver, VER_COND_LT); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_LE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); + delete rop; rop = new_rop(); cls_version_check(*rop, cond_ver, VER_COND_TAG_NE); ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl)); + + delete rop; } diff --git a/src/test/common/test_sharedptr_registry.cc b/src/test/common/test_sharedptr_registry.cc new file mode 100644 index 00000000000..aec2107c9e5 --- /dev/null +++ b/src/test/common/test_sharedptr_registry.cc @@ -0,0 +1,289 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include "common/Thread.h" +#include "common/sharedptr_registry.hpp" +#include "common/ceph_argparse.h" +#include "global/global_init.h" +#include <gtest/gtest.h> + +using namespace std::tr1; + +class SharedPtrRegistryTest : public SharedPtrRegistry<unsigned int, int> { +public: + Mutex &get_lock() { return lock; } + map<unsigned int, weak_ptr<int> > &get_contents() { return contents; } +}; + +class SharedPtrRegistry_all : public ::testing::Test { +public: + + class Thread_wait : public Thread { + public: + SharedPtrRegistryTest ®istry; + unsigned int key; + int value; + shared_ptr<int> ptr; + enum in_method_t { LOOKUP, LOOKUP_OR_CREATE } in_method; + + Thread_wait(SharedPtrRegistryTest& _registry, unsigned int _key, int _value, in_method_t _in_method) : + registry(_registry), + key(_key), + value(_value), + in_method(_in_method) + { + } + + virtual void *entry() { + switch(in_method) { + case LOOKUP_OR_CREATE: + if (value) + ptr = registry.lookup_or_create<int>(key, value); + else + ptr = registry.lookup_or_create(key); + break; + case LOOKUP: + ptr = shared_ptr<int>(new int); + *ptr = value; + ptr = registry.lookup(key); + break; + } + return NULL; + } + }; + + static const useconds_t DELAY_MAX = 20 * 1000 * 1000; + static useconds_t delay; + + bool wait_for(SharedPtrRegistryTest ®istry, int waiting) { + do { + // + // the delay variable is supposed to be initialized to zero. It would be fine + // to usleep(0) but we take this opportunity to test the loop. It will try + // again and therefore show that the logic ( increasing the delay ) actually + // works. + // + if (delay > 0) + usleep(delay); + { + Mutex::Locker l(registry.get_lock()); + if (registry.waiting == waiting) + break; + } + if (delay > 0) + cout << "delay " << delay << "us, is not long enough, try again\n"; + } while (( delay = delay * 2 + 1) < DELAY_MAX); + return delay < DELAY_MAX; + } +}; + +useconds_t SharedPtrRegistry_all::delay = 0; + +TEST_F(SharedPtrRegistry_all, lookup_or_create) { + SharedPtrRegistryTest registry; + unsigned int key = 1; + int value = 2; + shared_ptr<int> ptr = registry.lookup_or_create(key); + *ptr = value; + ASSERT_EQ(value, *registry.lookup_or_create(key)); +} + +TEST_F(SharedPtrRegistry_all, wait_lookup_or_create) { + SharedPtrRegistryTest registry; + + // + // simulate the following: The last reference to a shared_ptr goes + // out of scope and the shared_ptr object is about to be removed and + // marked as such. The weak_ptr stored in the registry will show + // that it has expired(). However, the SharedPtrRegistry::OnRemoval + // object has not yet been called and did not get a chance to + // acquire the lock. The lookup_or_create and lookup methods must + // detect that situation and wait until the weak_ptr is removed from + // the registry. + // + { + unsigned int key = 1; + { + shared_ptr<int> ptr(new int); + registry.get_contents()[key] = ptr; + } + EXPECT_FALSE(registry.get_contents()[key].lock()); + + Thread_wait t(registry, key, 0, Thread_wait::LOOKUP_OR_CREATE); + t.create(); + ASSERT_TRUE(wait_for(registry, 1)); + EXPECT_FALSE(t.ptr); + // waiting on a key does not block lookups on other keys + EXPECT_TRUE(registry.lookup_or_create(key + 12345)); + registry.remove(key); + ASSERT_TRUE(wait_for(registry, 0)); + EXPECT_TRUE(t.ptr); + t.join(); + } + { + unsigned int key = 2; + int value = 3; + { + shared_ptr<int> ptr(new int); + registry.get_contents()[key] = ptr; + } + EXPECT_FALSE(registry.get_contents()[key].lock()); + + Thread_wait t(registry, key, value, Thread_wait::LOOKUP_OR_CREATE); + t.create(); + ASSERT_TRUE(wait_for(registry, 1)); + EXPECT_FALSE(t.ptr); + // waiting on a key does not block lookups on other keys + { + int other_value = value + 1; + unsigned int other_key = key + 1; + shared_ptr<int> ptr = registry.lookup_or_create<int>(other_key, other_value); + EXPECT_TRUE(ptr); + EXPECT_EQ(other_value, *ptr); + } + registry.remove(key); + ASSERT_TRUE(wait_for(registry, 0)); + EXPECT_TRUE(t.ptr); + EXPECT_EQ(value, *t.ptr); + t.join(); + } +} + +TEST_F(SharedPtrRegistry_all, lookup) { + SharedPtrRegistryTest registry; + unsigned int key = 1; + int value = 2; + { + shared_ptr<int> ptr = registry.lookup_or_create(key); + *ptr = value; + ASSERT_EQ(value, *registry.lookup(key)); + } + ASSERT_FALSE(registry.lookup(key)); +} + +TEST_F(SharedPtrRegistry_all, wait_lookup) { + SharedPtrRegistryTest registry; + + unsigned int key = 1; + int value = 2; + { + shared_ptr<int> ptr(new int); + registry.get_contents()[key] = ptr; + } + EXPECT_FALSE(registry.get_contents()[key].lock()); + + Thread_wait t(registry, key, value, Thread_wait::LOOKUP); + t.create(); + ASSERT_TRUE(wait_for(registry, 1)); + EXPECT_EQ(value, *t.ptr); + // waiting on a key does not block lookups on other keys + EXPECT_FALSE(registry.lookup(key + 12345)); + registry.remove(key); + ASSERT_TRUE(wait_for(registry, 0)); + EXPECT_FALSE(t.ptr); + t.join(); +} + +TEST_F(SharedPtrRegistry_all, get_next) { + + { + SharedPtrRegistry<unsigned int,int> registry; + const unsigned int key = 0; + pair<unsigned int, int> i; + EXPECT_FALSE(registry.get_next(key, &i)); + } + { + SharedPtrRegistryTest registry; + + const unsigned int key2 = 333; + shared_ptr<int> ptr2 = registry.lookup_or_create(key2); + const int value2 = *ptr2 = 400; + + // entries with expired pointers are silentely ignored + const unsigned int key_gone = 222; + registry.get_contents()[key_gone] = shared_ptr<int>(); + + const unsigned int key1 = 111; + shared_ptr<int> ptr1 = registry.lookup_or_create(key1); + const int value1 = *ptr1 = 800; + + pair<unsigned int, int> i; + EXPECT_TRUE(registry.get_next(i.first, &i)); + EXPECT_EQ(key1, i.first); + EXPECT_EQ(value1, i.second); + + EXPECT_TRUE(registry.get_next(i.first, &i)); + EXPECT_EQ(key2, i.first); + EXPECT_EQ(value2, i.second); + + EXPECT_FALSE(registry.get_next(i.first, &i)); + } +} + +class SharedPtrRegistry_destructor : public ::testing::Test { +public: + + typedef enum { UNDEFINED, YES, NO } DieEnum; + static DieEnum died; + + struct TellDie { + TellDie() { died = NO; } + ~TellDie() { died = YES; } + + int value; + }; + + virtual void SetUp() { + died = UNDEFINED; + } +}; + +SharedPtrRegistry_destructor::DieEnum SharedPtrRegistry_destructor::died = SharedPtrRegistry_destructor::UNDEFINED; + +TEST_F(SharedPtrRegistry_destructor, destructor) { + SharedPtrRegistry<int,TellDie> registry; + EXPECT_EQ(UNDEFINED, died); + int key = 101; + { + shared_ptr<TellDie> a = registry.lookup_or_create(key); + EXPECT_EQ(NO, died); + EXPECT_TRUE(a); + } + EXPECT_EQ(YES, died); + EXPECT_FALSE(registry.lookup(key)); +} + +int main(int argc, char **argv) { + vector<const char*> args; + argv_to_vec(argc, (const char **)argv, args); + + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_sharedptr_registry && ./unittest_sharedptr_registry # --gtest_filter=*.* --log-to-stderr=true" +// End: diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc index 87482ef702d..80c775052ec 100644 --- a/src/test/filestore/store_test.cc +++ b/src/test/filestore/store_test.cc @@ -829,6 +829,75 @@ TEST_F(StoreTest, ColSplitTest3) { } #endif +/** + * This test tests adding two different groups + * of objects, each with 1 common prefix and 1 + * different prefix. We then remove half + * in order to verify that the merging correctly + * stops at the common prefix subdir. See bug + * #5273 */ +TEST_F(StoreTest, TwoHash) { + coll_t cid("asdf"); + int r; + { + ObjectStore::Transaction t; + t.create_collection(cid); + r = store->apply_transaction(t); + ASSERT_EQ(r, 0); + } + std::cout << "Making objects" << std::endl; + for (int i = 0; i < 360; ++i) { + ObjectStore::Transaction t; + hobject_t o; + if (i < 8) { + o.hash = (i << 16) | 0xA1; + t.touch(cid, o); + } + o.hash = (i << 16) | 0xB1; + t.touch(cid, o); + r = store->apply_transaction(t); + ASSERT_EQ(r, 0); + } + std::cout << "Removing half" << std::endl; + for (int i = 1; i < 8; ++i) { + ObjectStore::Transaction t; + hobject_t o; + o.hash = (i << 16) | 0xA1; + t.remove(cid, o); + r = store->apply_transaction(t); + ASSERT_EQ(r, 0); + } + std::cout << "Checking" << std::endl; + for (int i = 1; i < 8; ++i) { + ObjectStore::Transaction t; + hobject_t o; + o.hash = (i << 16) | 0xA1; + bool exists = store->exists(cid, o); + ASSERT_EQ(exists, false); + } + { + hobject_t o; + o.hash = 0xA1; + bool exists = store->exists(cid, o); + ASSERT_EQ(exists, true); + } + std::cout << "Cleanup" << std::endl; + for (int i = 0; i < 360; ++i) { + ObjectStore::Transaction t; + hobject_t o; + o.hash = (i << 16) | 0xA1; + t.remove(cid, o); + o.hash = (i << 16) | 0xB1; + t.remove(cid, o); + r = store->apply_transaction(t); + ASSERT_EQ(r, 0); + } + ObjectStore::Transaction t; + t.remove_collection(cid); + r = store->apply_transaction(t); + ASSERT_EQ(r, 0); +} + // // support tests for qa/workunits/filestore/filestore.sh // @@ -892,7 +961,7 @@ int main(int argc, char **argv) { global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val("osd_journal_size", "400"); - g_ceph_context->_conf->set_val("filestore_index_retry_probability", "1"); + g_ceph_context->_conf->set_val("filestore_index_retry_probability", "0.5"); g_ceph_context->_conf->set_val("filestore_op_thread_timeout", "1000"); g_ceph_context->_conf->set_val("filestore_op_thread_suicide_timeout", "10000"); g_ceph_context->_conf->apply_changes(NULL); diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc index 43530f00828..6ac661c0629 100644 --- a/src/test/osd/TestRados.cc +++ b/src/test/osd/TestRados.cc @@ -2,6 +2,7 @@ #include "common/Mutex.h" #include "common/Cond.h" #include "common/errno.h" +#include "common/version.h" #include <iostream> #include <sstream> @@ -14,6 +15,7 @@ #include "test/osd/RadosModel.h" + using namespace std; class WeightedTestGenerator : public TestOpGenerator @@ -250,6 +252,7 @@ int main(int argc, char **argv) if (max_stride_size < 0) max_stride_size = size / 5; + cout << pretty_version_to_str() << std::endl; cout << "Configuration:" << std::endl << "\tNumber of operations: " << ops << std::endl << "\tNumber of objects: " << objects << std::endl diff --git a/src/test/perf_counters.cc b/src/test/perf_counters.cc index d8f04ca7d10..d0b05f9f049 100644 --- a/src/test/perf_counters.cc +++ b/src/test/perf_counters.cc @@ -60,7 +60,7 @@ int main(int argc, char **argv) { TEST(PerfCounters, SimpleTest) { AdminSocketClient client(get_rand_socket_path()); std::string message; - ASSERT_EQ("", client.do_request("perfcounters_dump", &message)); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\" }", &message)); ASSERT_EQ("{}", message); } @@ -100,20 +100,20 @@ TEST(PerfCounters, SinglePerfCounters) { coll->add(fake_pf); AdminSocketClient client(get_rand_socket_path()); std::string msg; - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':0," - "'element2':0.000000000,'element3':{'avgcount':0,'sum':0.000000000}}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":0," + "\"element2\":0.000000000,\"element3\":{\"avgcount\":0,\"sum\":0.000000000}}}"), msg); fake_pf->inc(TEST_PERFCOUNTERS1_ELEMENT_1); fake_pf->tset(TEST_PERFCOUNTERS1_ELEMENT_2, utime_t(0, 500000000)); fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t(100, 0)); - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':1," - "'element2':0.500000000,'element3':{'avgcount':1,'sum':100.000000000}}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":1," + "\"element2\":0.500000000,\"element3\":{\"avgcount\":1,\"sum\":100.000000000}}}"), msg); fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t()); fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t(25,0)); - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':1,'element2':0.500000000," - "'element3':{'avgcount':3,'sum':125.000000000}}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":1,\"element2\":0.500000000," + "\"element3\":{\"avgcount\":3,\"sum\":125.000000000}}}"), msg); } enum { @@ -142,24 +142,24 @@ TEST(PerfCounters, MultiplePerfCounters) { AdminSocketClient client(get_rand_socket_path()); std::string msg; - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':0,'element2':0.000000000,'element3':" - "{'avgcount':0,'sum':0.000000000}},'test_perfcounter_2':{'foo':0,'bar':0.000000000}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":0,\"element2\":0.000000000,\"element3\":" + "{\"avgcount\":0,\"sum\":0.000000000}},\"test_perfcounter_2\":{\"foo\":0,\"bar\":0.000000000}}"), msg); fake_pf1->inc(TEST_PERFCOUNTERS1_ELEMENT_1); fake_pf1->inc(TEST_PERFCOUNTERS1_ELEMENT_1, 5); - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':6,'element2':0.000000000,'element3':" - "{'avgcount':0,'sum':0.000000000}},'test_perfcounter_2':{'foo':0,'bar':0.000000000}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":6,\"element2\":0.000000000,\"element3\":" + "{\"avgcount\":0,\"sum\":0.000000000}},\"test_perfcounter_2\":{\"foo\":0,\"bar\":0.000000000}}"), msg); coll->remove(fake_pf2); - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':6,'element2':0.000000000," - "'element3':{'avgcount':0,'sum':0.000000000}}}"), msg); - ASSERT_EQ("", client.do_request("perfcounters_schema", &msg)); - ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':{'type':2}," - "'element2':{'type':1},'element3':{'type':5}}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":6,\"element2\":0.000000000," + "\"element3\":{\"avgcount\":0,\"sum\":0.000000000}}}"), msg); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perf schema\", \"format\": \"json\" }", &msg)); + ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":{\"type\":2}," + "\"element2\":{\"type\":1},\"element3\":{\"type\":5}}}"), msg); coll->clear(); - ASSERT_EQ("", client.do_request("perfcounters_dump", &msg)); + ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg)); ASSERT_EQ("{}", msg); } diff --git a/src/test/signals.cc b/src/test/signals.cc index 43754988559..2c7f4d32a0a 100644 --- a/src/test/signals.cc +++ b/src/test/signals.cc @@ -110,3 +110,33 @@ TEST(SignalHandler, Multiple) unregister_async_signal_handler(SIGUSR2, testhandler); shutdown_async_signal_handler(); } + +/* +TEST(SignalHandler, MultipleBigFd) +{ + int ret; + + for (int i = 0; i < 1500; i++) + ::open(".", O_RDONLY); + + reset(); + init_async_signal_handler(); + register_async_signal_handler(SIGUSR1, testhandler); + register_async_signal_handler(SIGUSR2, testhandler); + ASSERT_TRUE(usr1 == false); + ASSERT_TRUE(usr2 == false); + + ret = kill(getpid(), SIGUSR1); + ASSERT_EQ(ret, 0); + ret = kill(getpid(), SIGUSR2); + ASSERT_EQ(ret, 0); + + sleep(1); + ASSERT_TRUE(usr1 == true); + ASSERT_TRUE(usr2 == true); + + unregister_async_signal_handler(SIGUSR1, testhandler); + unregister_async_signal_handler(SIGUSR2, testhandler); + shutdown_async_signal_handler(); +} +*/ diff --git a/src/test/system/rados_list_parallel.cc b/src/test/system/rados_list_parallel.cc index a1c6e270265..d530c83441c 100644 --- a/src/test/system/rados_list_parallel.cc +++ b/src/test/system/rados_list_parallel.cc @@ -62,6 +62,7 @@ public: RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); std::string log_name = SysTestSettings::inst().get_log_name(get_id_str()); if (!log_name.empty()) rados_conf_set(cl, "log_file", log_name.c_str()); @@ -142,6 +143,7 @@ public: RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); std::string log_name = SysTestSettings::inst().get_log_name(get_id_str()); if (!log_name.empty()) rados_conf_set(cl, "log_file", log_name.c_str()); diff --git a/src/test/system/rados_open_pools_parallel.cc b/src/test/system/rados_open_pools_parallel.cc index 445f2ebc485..82c712077f2 100644 --- a/src/test/system/rados_open_pools_parallel.cc +++ b/src/test/system/rados_open_pools_parallel.cc @@ -68,6 +68,7 @@ public: if (!log_name.empty()) rados_conf_set(cl, "log_file", log_name.c_str()); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); RETURN1_IF_NONZERO(rados_connect(cl)); if (m_pool_setup_sem) m_pool_setup_sem->wait(); diff --git a/src/test/system/st_rados_create_pool.cc b/src/test/system/st_rados_create_pool.cc index dcae15375af..29b8d14b7e7 100644 --- a/src/test/system/st_rados_create_pool.cc +++ b/src/test/system/st_rados_create_pool.cc @@ -72,6 +72,7 @@ run() std::string log_name = SysTestSettings::inst().get_log_name(get_id_str()); if (!log_name.empty()) rados_conf_set(cl, "log_file", log_name.c_str()); + rados_conf_parse_env(cl, NULL); if (m_setup_sem) { m_setup_sem->wait(); diff --git a/src/test/system/st_rados_delete_objs.cc b/src/test/system/st_rados_delete_objs.cc index 38dc47a3557..b43ffdda6ae 100644 --- a/src/test/system/st_rados_delete_objs.cc +++ b/src/test/system/st_rados_delete_objs.cc @@ -45,6 +45,7 @@ int StRadosDeleteObjs::run() RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); RETURN1_IF_NONZERO(rados_connect(cl)); m_setup_sem->wait(); m_setup_sem->post(); diff --git a/src/test/system/st_rados_delete_pool.cc b/src/test/system/st_rados_delete_pool.cc index d954bf46c23..de553e98e13 100644 --- a/src/test/system/st_rados_delete_pool.cc +++ b/src/test/system/st_rados_delete_pool.cc @@ -41,6 +41,7 @@ int StRadosDeletePool::run() RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); RETURN1_IF_NONZERO(rados_connect(cl)); m_pool_setup_sem->wait(); m_pool_setup_sem->post(); diff --git a/src/test/system/st_rados_list_objects.cc b/src/test/system/st_rados_list_objects.cc index bb153affeb8..be6ead64987 100644 --- a/src/test/system/st_rados_list_objects.cc +++ b/src/test/system/st_rados_list_objects.cc @@ -56,6 +56,7 @@ run() RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); RETURN1_IF_NONZERO(rados_connect(cl)); m_pool_setup_sem->wait(); m_pool_setup_sem->post(); diff --git a/src/test/system/st_rados_notify.cc b/src/test/system/st_rados_notify.cc index 10e3e4bfbc3..e74711cb641 100644 --- a/src/test/system/st_rados_notify.cc +++ b/src/test/system/st_rados_notify.cc @@ -44,6 +44,8 @@ int StRadosNotify::run() RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); + if (m_setup_sem) { m_setup_sem->wait(); m_setup_sem->post(); diff --git a/src/test/system/st_rados_watch.cc b/src/test/system/st_rados_watch.cc index 991dde8ea77..696b0867e88 100644 --- a/src/test/system/st_rados_watch.cc +++ b/src/test/system/st_rados_watch.cc @@ -52,6 +52,8 @@ run() RETURN1_IF_NONZERO(rados_create(&cl, NULL)); rados_conf_parse_argv(cl, m_argc, m_argv); RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL)); + rados_conf_parse_env(cl, NULL); + if (m_setup_sem) { m_setup_sem->wait(); m_setup_sem->post(); diff --git a/src/test/test_rgw_admin_log.cc b/src/test/test_rgw_admin_log.cc index 67ba0b12d96..f49a107d2f6 100644 --- a/src/test/test_rgw_admin_log.cc +++ b/src/test/test_rgw_admin_log.cc @@ -163,13 +163,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -405,7 +405,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -426,7 +425,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " metadata rm --metadata-key=user:" << uid; diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc index 2882891e411..fb9cf863f06 100644 --- a/src/test/test_rgw_admin_meta.cc +++ b/src/test/test_rgw_admin_meta.cc @@ -157,13 +157,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -399,7 +399,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -420,7 +419,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user rm --uid=" << uid << " --display-name=" << display_name; diff --git a/src/test/test_rgw_admin_opstate.cc b/src/test/test_rgw_admin_opstate.cc index 1cd39890d48..3f173d12932 100644 --- a/src/test/test_rgw_admin_opstate.cc +++ b/src/test/test_rgw_admin_opstate.cc @@ -160,13 +160,13 @@ void test_helper::set_response(char *r){ } size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response((char *)ptr); return size*nmemb; } size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){ - test_helper *h = (test_helper *)ud; + test_helper *h = static_cast<test_helper *>(ud); h->set_response_data((char *)ptr, size*nmemb); return size*nmemb; } @@ -403,7 +403,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) { int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid << " --display-name=" << display_name; @@ -424,7 +423,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) { int user_rm(string& uid, string& display_name) { stringstream ss; - string creds; ss << "-c " << g_test->get_ceph_conf_path() << " metadata rm --metadata-key=user:" << uid; diff --git a/src/upstart/ceph-create-keys.conf b/src/upstart/ceph-create-keys.conf index de215d98ff3..7c79e692a86 100644 --- a/src/upstart/ceph-create-keys.conf +++ b/src/upstart/ceph-create-keys.conf @@ -1,6 +1,7 @@ description "Create Ceph client.admin key when possible" start on started ceph-mon +stop on stopping ceph-mon task diff --git a/src/upstart/ceph-osd.conf b/src/upstart/ceph-osd.conf index e26bbc790b9..c0bf4d68c1c 100644 --- a/src/upstart/ceph-osd.conf +++ b/src/upstart/ceph-osd.conf @@ -6,7 +6,7 @@ stop on runlevel [!2345] or stopping ceph-osd-all respawn respawn limit 5 30 -limit nofile 16384 16384 +limit nofile 32768 32768 pre-start script set -e |