121 files changed, 2637 insertions, 1252 deletions
diff --git a/COPYING b/COPYING
index b374bdc1801..fe6ffaac0f8 100644
--- a/COPYING
+++ b/COPYING
@@ -15,11 +15,6 @@ Copyright:
     Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
 License: GPL2
 
-Files: src/common/fiemap.cc
-Copyright:
-    Copyright (C) 2010 Canonical
-License: GPL2
-
 Files: src/mount/canonicalize.c
 Copyright: Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
 License: LGPL2 or later
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index ea06b760ba3..4b6cb800290 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -1,3 +1,15 @@
 v0.67
 ~~~~~
 
+* ceph-osd now requires a max fd limit of at least
+  filestore_wbthrottle_(xfs|btrfs)_inodes_hard_limit (5000 by default)
+  in order to accomodate the new write back throttle system.  upstart
+  now sets the fd limit to 32k.  sysvinit will set it to 32k by default
+  (still overrideable via max_open_files).
+
+* The 'ceph pg <pgid> ...' commands (like 'ceph pg <pgid> query') are
+  deprecated in favor of 'ceph tell <pgid> ...'.  This makes the
+  distinction between 'ceph pg <command> <pgid>' and 'ceph pg <pgid>
+  <command>' less awkward by making it clearer that the 'tell'
+  commands are talking to the OSD serving the placement group, not the
+  monitor.
diff --git a/ceph.spec.in b/ceph.spec.in
index 4365eb55eb0..696d9ad3332 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -146,7 +146,9 @@ managers such as Pacemaker.
 Summary:	RADOS distributed object store client library
 Group:		System Environment/Libraries
 License:	LGPL-2.0
+%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
 Obsoletes:	ceph-libs
+%endif
 %description -n librados2
 RADOS is a reliable, autonomic distributed object storage cluster
 developed as part of the Ceph distributed storage system. This is a
@@ -157,7 +159,9 @@ store using a simple file-like interface.
 Summary:	RADOS block device client library
 Group:		System Environment/Libraries
 License:	LGPL-2.0
+%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
 Obsoletes:	ceph-libs
+%endif
 %description -n librbd1
 RBD is a block device striped across multiple distributed objects in
 RADOS, a reliable, autonomic distributed object storage cluster
@@ -168,7 +172,9 @@ shared library allowing applications to manage these block devices.
 Summary:	Ceph distributed file system client library
 Group:		System Environment/Libraries
 License:	LGPL-2.0
+%if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
 Obsoletes:	ceph-libs
+%endif
 %description -n libcephfs1
 Ceph is a distributed network file system designed to provide excellent
 performance, reliability, and scalability. This is a shared library
@@ -182,6 +188,8 @@ License:	LGPL-2.0
 Requires:	librados2 = %{version}-%{release}
 Requires:	librbd1 = %{version}-%{release}
 Requires:	libcephfs1 = %{version}-%{release}
+Requires:	python-flask
+Requires:	python-requests
 %if 0%{defined suse_version}
 %py_requires
 %endif
diff --git a/configure.ac b/configure.ac
index 415da311712..812126664eb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
 # VERSION define is not used by the code.  It gets a version string
 # from 'git describe'; see src/ceph_ver.[ch]
 
-AC_INIT([ceph], [0.67-rc1], [ceph-devel@vger.kernel.org])
+AC_INIT([ceph], [0.67], [ceph-devel@vger.kernel.org])
 
 # Create release string.  Used with VERSION for RPMs.
 RPM_RELEASE=0
diff --git a/debian/changelog b/debian/changelog
index cfcf0491dd8..7b814f0da90 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+ceph (0.67-rc2-1) precise; urgency=low
+
+  * New upstream release 
+
+ -- Gary Lowell <gary.lowell@inktank.com>  Wed, 24 Jul 2013 16:18:33 -0700
+
 ceph (0.67-rc1-1) precise; urgency=low
 
   * New upstream release 
diff --git a/debian/control b/debian/control
index 241fabb5e4f..195cb37fe62 100644
--- a/debian/control
+++ b/debian/control
@@ -100,7 +100,7 @@ Description: debugging symbols for ceph-mds
 Package: ceph-fuse
 Architecture: linux-any
 Depends: ${misc:Depends}, ${shlibs:Depends}
-Recommends: fuse | fuse-utils
+Recommends: fuse
 Description: FUSE-based client for the Ceph distributed file system
  Ceph is a distributed network file system designed to provide
  excellent performance, reliability, and scalability.  This is a
@@ -129,7 +129,7 @@ Description: debugging symbols for ceph-fuse
 Package: rbd-fuse
 Architecture: linux-any
 Depends: ${misc:Depends}, ${shlibs:Depends}
-Recommends: fuse | fuse-utils
+Recommends: fuse
 Description: FUSE-based rbd client for the Ceph distributed file system
  Ceph is a distributed network file system designed to provide
  excellent performance, reliability, and scalability.  This is a
@@ -389,7 +389,7 @@ Description: Ceph test and benchmarking tools.
 Package: python-ceph
 Architecture: linux-any
 Section: python
-Depends: librados2, librbd1, ${misc:Depends}, ${python:Depends}
+Depends: librados2, librbd1, python-flask, ${misc:Depends}, ${python:Depends}, python-requests
 X-Python-Version: >= 2.6
 Description: Python libraries for the Ceph distributed filesystem
  Ceph is a distributed storage and network file system designed to provide
diff --git a/debian/copyright b/debian/copyright
index 4295a1564f9..e94a11b9962 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -16,11 +16,6 @@ Copyright:
     Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
 License: GPL2
 
-Files: src/common/fiemap.cc
-Copyright:
-    Copyright (C) 2010 Canonical
-License: GPL2
-
 Files: src/mount/canonicalize.c
 Copyright: Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
 License: LGPL2 or later
diff --git a/doc/changelog/v0.61.6.txt b/doc/changelog/v0.61.6.txt
new file mode 100644
index 00000000000..3a1e5bd4be9
--- /dev/null
+++ b/doc/changelog/v0.61.6.txt
@@ -0,0 +1,75 @@
+commit 59ddece17e36fef69ecf40e239aeffad33c9db35
+Author: Gary Lowell <gary.lowell@inktank.com>
+Date:   Tue Jul 23 13:52:19 2013 -0700
+
+    v0.61.6
+
+commit 38c3271d3fc415919f0856398bd94eb87a0776b5
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jul 23 13:32:12 2013 -0700
+
+    mon/OSDMonitor: fix base case for 7fb3804fb workaround
+    
+    After cluster creation, we have no full map stored and first_committed ==
+    1.  In that case, there is no need for a full map, since we can get there
+    from OSDMap() and the incrementals.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao@inktank.com>
+    (cherry picked from commit e807770784175b05130bba938113fdbf874f152e)
+
+commit f94956cb1a56ff62e01b7ae218a93c4004470ae5
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Tue Jul 23 17:25:13 2013 +0100
+
+    mon: OSDMonitor: work around a full version bug introduced in 7fb3804fb
+    
+    In 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6 we moved the full version
+    stashing logic to the encode_trim_extra() function.  However, we forgot
+    to update the osdmap's 'latest_full' key that should always point to
+    the latest osdmap full version.  This eventually degenerated in a missing
+    full version after a trim.  This patch works around this bug by looking
+    for the latest available full osdmap version in the store and updating
+    'latest_full' to its proper value.
+    
+    Related-to: #5704
+    Backport: cuttlefish
+    
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 97462a3213e5e15812c79afc0f54d697b6c498b1)
+
+commit 10e1de45dc8ace793ecf921f884f90c9daa99c48
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Tue Jul 23 16:36:52 2013 +0100
+
+    mon: OSDMonitor: update the osdmap's latest_full with the new full version
+    
+    We used to do this on encode_full(), but since [1] we no longer rely on
+    PaxosService to manage the full maps for us.  And we forgot to write down
+    the latest_full version to the store, leaving it in a truly outdated state.
+    
+    [1] - 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6
+    
+    Fixes: #5704
+    Backport: cuttlefish
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit a815547ed3e5ffdbbb96c8c0c1b8d6dd8c62bfba)
+
+commit a0cb40b45c4f2f921a63c2d7bb5a28572381d793
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 18 14:35:19 2013 -0700
+
+    mon: decline to scrub when paxos is not active
+    
+    In f1ce8d7c955a2443111bf7d9e16b4c563d445712 we close a race between scrub
+    and paxos commit completion on the leader.  The fix is nontrivial to
+    backport and probably not worthwhile; just avoid scrubbing at that time
+    for now.
+    
+    Note that the actual fix for this is in commit
+    f1ce8d7c955a2443111bf7d9e16b4c563d445712.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
diff --git a/doc/changelog/v0.61.7.txt b/doc/changelog/v0.61.7.txt
new file mode 100644
index 00000000000..5836c6a32d3
--- /dev/null
+++ b/doc/changelog/v0.61.7.txt
@@ -0,0 +1,220 @@
+commit 8f010aff684e820ecc837c25ac77c7a05d7191ff
+Author: Gary Lowell <gary.lowell@inktank.com>
+Date:   Wed Jul 24 20:44:12 2013 -0700
+
+    v0.61.7
+
+commit 24a56a9637afd8c64b71d264359c78a25d52be02
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 24 14:46:24 2013 -0700
+
+    ceph-disk: use new get_dev_path helper for list
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Dan Mick <dan.mick@inktank.com>
+    Tested-by: Olivier Bonvalet <ob.ceph@daevel.fr>
+    (cherry picked from commit fd1fd664d6102a2a96b27e8ca9933b54ac626ecb)
+
+commit 1f8e4b15eeb132fd7f389318009b19f8f13adbf5
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 11 12:59:56 2013 -0700
+
+    ceph-disk: use /sys/block to determine partition device names
+    
+    Not all devices are basename + number; some have intervening character(s),
+    like /dev/cciss/c0d1p2.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 2ea8fac441141d64ee0d26c5dd2b441f9782d840)
+
+commit 0a08c8226cb3e461301beade9bab2e264d1b960e
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 11:01:58 2013 -0700
+
+    ceph-disk: reimplement is_partition() using /sys/block
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 5b031e100b40f597752b4917cdbeebb366eb98d7)
+
+commit 056000346db09ea7274a22e57cf4b86a7ea4090e
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 11:01:39 2013 -0700
+
+    ceph-disk: use get_dev_name() helper throughout
+    
+    This is more robust than the broken split trick.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 3359aaedde838c98d1155611e157fd2da9e8b9f5)
+
+commit f3ee3e690c42769229a6cd9ae8dec43f2aa22ecd
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 10:55:36 2013 -0700
+
+    ceph-disk: refactor list_[all_]partitions
+    
+    Make these methods work in terms of device *names*, not paths, and fix up
+    the only direct list_partitions() caller to do the same.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 35d3f2d84808efda3d2ac868afe03e6959d51c03)
+
+commit be12811b4cb98ff1c2c691c67af7ad3586c436ff
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 10:52:29 2013 -0700
+
+    ceph-disk: add get_dev_name, path helpers
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e0401591e352ea9653e3276d66aebeb41801eeb3)
+
+commit f46dbc462f623e9ab6c00394abb4d890e5d90890
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jun 18 16:21:48 2013 -0700
+
+    ceph-disk: handle /dev/foo/bar devices throughout
+    
+    Assume the last component is the unique device name, even if it appears
+    under a subdir of /dev.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit cb97338b1186939deecb78e9d949c38c3ef59026)
+
+commit f799dac7bdf7cf0824a177131473cf59ef3c5205
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 17 20:54:15 2013 -0700
+
+    ceph-disk: make is_held() smarter about full disks
+    
+    Handle the case where the device is a full disk.  Make the partition
+    check a bit more robust (don't make assumptions about naming aside from
+    the device being a prefix of the partition).
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e082f1247fb6ddfb36c4223cbfdf500d6b45c978)
+
+commit 27f31895664fa7f10c1617d486f2a6ece0f97091
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 24 11:55:42 2013 -0700
+
+    mon/OSDMonitor: search for latest full osdmap if record version is missing
+    
+    In 97462a3213e5e15812c79afc0f54d697b6c498b1 we tried to search for a
+    recent full osdmap but were looking at the wrong key.  If full_0 was
+    present we could record that the latest full map was last_committed even
+    though it wasn't present.  This is fixed in 76cd7ac1c, but we need to
+    compensate for when get_version_latest_full() gives us a back version
+    number by repeating the search.
+    
+    Fixes: #5737
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit c2131d4047156aa2964581c9dbd93846382a07e7)
+
+commit 5b0967f03efb1be210b52f24f095f023fe1bc539
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Mon Jun 17 14:43:36 2013 +0100
+
+    test: test_store_tool: global init before using LevelDBStore
+    
+    Fixes a segfault
+    
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit a7a7d3fc8a2ba4a30ef136a32f2903d157b3e19a)
+
+commit 115468c73f121653eec2efc030d5ba998d834e43
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Wed Jul 24 12:00:28 2013 +0100
+
+    mon: OSDMonitor: fix a bug introduced on 97462a32
+    
+    Fixes: #5737
+    Backport: cuttlefish
+    
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 76cd7ac1c2094b34ad36bea89b2246fa90eb2f6d)
+
+commit 938a639e2cb6abd22c2c588e619c1aae32c6521f
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jul 21 08:48:18 2013 -0700
+
+    mon/Paxos: fix pn for uncommitted value during collect/last phase
+    
+    During the collect/last exchange, peers share any uncommitted values
+    with the leader.  They are supposed to also share the pn under which
+    that value was accepted, but were instead using the just-accepted pn
+    value.  This effectively meant that we *always* took the uncommitted
+    value; if there were multiples, which one we accepted depended on what
+    order the LAST messages arrived, not which pn the values were generated
+    under.
+    
+    The specific failure sequence I observed:
+    
+     - collect
+      - learned uncommitted value for 262 from myself
+      - send collect with pn 901
+     - got last with pn 901 (incorrect) for 200 (old) from peer
+      - discard our own value, remember the other
+     - finish collect phase
+      - ignore old uncommitted value
+    
+    Fix this by storing a pending_v and pending_pn value whenever we accept
+    a value.  Use this to send an appropriate pn value in the LAST reply
+    so that the leader can make it's decision about which uncommitted value
+    to accept based on accurate information.  Also use it when we learn
+    the uncommitted value from ourselves.
+    
+    We could probably be more clever about storing less information here,
+    for example by omitting pending_v and clearing pending_pn at the
+    appropriate point, but that would be more fragile.  Similarly, we could
+    store a pn for *every* commit if we wanted to lay some groundwork for
+    having multiple uncommitted proposals in flight, but I don't want to
+    speculate about what is necessary or sufficient for a correct solution
+    there.
+    
+    Fixes: #5698
+    Backport: cuttlefish, bobtail
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 20baf662112dd5f560bc3a2d2114b469444c3de8)
+
+commit 18596340f020be1f21bdc9bcc752ae1da4a93a46
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jul 21 08:12:46 2013 -0700
+
+    mon/Paxos: debug ignored uncommitted values
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 19b29788966eb80ed847630090a16a3d1b810969)
+
+commit f598245f1355d7791162c03d90bdd97b013e56f3
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jul 21 08:11:22 2013 -0700
+
+    mon/Paxos: only learn uncommitted value if it is in the future
+    
+    If an older peer sends an uncommitted value, make sure we only take it
+    if it is in the future, and at least as new as any current uncommitted
+    value.
+    
+    (Prior to the previous patch, peers could send values from long-past
+    rounds.  The pn values are also bogus.)
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit b3253a453c057914753846c77499f98d3845c58e)
+
+commit 732286a28cd8a643593d490a7a84a590d372f78d
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 22 14:13:23 2013 -0700
+
+    mon/Paxos: only share uncommitted value if it is next
+    
+    We may have an uncommitted value from our perspective (it is our lc + 1)
+    when the collector has a much larger lc (because we have been out for
+    the last few rounds).  Only share an uncommitted value if it is in fact
+    the next value.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit b26b7f6e5e02ac6beb66e3e34e177e6448cf91cf)
diff --git a/doc/dev/repo-lab-access.rst b/doc/dev/repo-lab-access.rst
new file mode 100644
index 00000000000..706f02e395c
--- /dev/null
+++ b/doc/dev/repo-lab-access.rst
@@ -0,0 +1,88 @@
+Notes on Ceph repositories and test lab
+=======================================
+
+Special branches
+----------------
+
+* ``master'': current tip (integration branch)
+* ``next'': pending release (feature frozen, bugfixes only)
+* ``last'': last/previous release
+* ``dumpling'', ``cuttlefish'', ``bobtail'', ``argonaut'', etc.: stable release branches
+* ``dumpling-next'': backports for stable release, pending testing
+
+Rules
+-----
+
+The source repos are all on github.
+
+* Any branch pushed to ceph.git will kick off builds that will either
+  run unit tests or generate packages for gitbuilder.ceph.com.  Try
+  not to generate unnecessary load.  For private, unreviewed work,
+  only push to branches named ``wip-*''.  This avoids colliding with
+  any special branches.
+
+* Nothing should every reach a special branch unless it has been
+  reviewed.
+
+* Preferred means of review is via github pull requests to capture any
+  review discussion.
+
+* For multi-patch series, the pull request can be merged via github,
+  and a Reviewed-by: ... line added to the merge commit.
+
+* For single- (or few-) patch merges, it is preferable to add the
+  Reviewed-by: directly to the commit so that it is also visible when
+  the patch is cherry-picked for backports.
+
+* All backports should use ``git cherry-pick -x'' to capture which
+  commit they are cherry-picking from.
+
+
+Teuthology lab hardware
+-----------------------
+
+* 96 plana: 8 core, 4 disk, 1gig and 10gig networks.  Used for nightly runs.
+
+* 64 burnupi: Dell R515s: 6 core, 8 disk, 1gig and 10gig networks, moderate CPU.  crummy SAS expanders.
+
+* 120 mira: 8 core, 8 disks, 1gig network.  older hardware, flakier disks
+
+* 8 vercoi: 24 core, RAID; VM hosts
+
+* 4 senta: 25 core: VM hosts
+
+* 36 saya: Calxeda ARM7 nodes (for testing)
+
+* 24 tala: Calxeda ARM7 nodes (gitbuilder)
+
+* ~200 vps: VMs running on mira hardware
+
+Locking machines
+----------------
+
+* All tests pull their builds from gitbuilder.ceph.com.
+
+* Anybody can lock machines with ``teuthology-lock --lock-many NUM
+  --machine-type TYPE''.
+
+* Machines are locked as ``whoami''@``hostname -s''.  --owner to
+  choose otherwise.
+
+* Automated tests current run on the ``plana''; please avoid locking
+  these for personal use.
+
+* To unlock, please use ``teuthology-nuke -t list.yaml -r -u'', which
+  will reboot and clean up any leftover test state before unlocking
+  (or fail to unlock).  It looks for a ``targets::'' section in the
+  yaml, so the regular job yaml will work.  You can get a list of all
+  locked machines with ``teuthology-lock --list-targets''.
+
+* ``teuthology-lock -a --brief'' or ``teuthology-lock --summary'' to
+  see what is locked and by whom.
+
+* Be conscientious about scheduling entire qa runs.  Coordinate
+  utilization on IRC.  Make sure you are running the latest version
+  ceph-qa-suite.git and teuthology.git.
+
+* Results for scheduled runs appear in /a/$jobname on the teuthology
+  machine.  ``ls -alt | head'' to find them.
diff --git a/doc/man/8/ceph-rest-api.rst b/doc/man/8/ceph-rest-api.rst
index 8a87f97ce19..a000d09c676 100644
--- a/doc/man/8/ceph-rest-api.rst
+++ b/doc/man/8/ceph-rest-api.rst
@@ -7,7 +7,7 @@
 Synopsis
 ========
 
-| **ceph-rest-api** [ -c *conffile* ] [ -n *name* ... ]
+| **ceph-rest-api** [ -c *conffile* ] [--cluster *clustername* ] [ -n *name* ] [-i *id* ]
 
 
 Description
@@ -21,7 +21,7 @@ command-line tool through an HTTP-accessible interface.
 Options
 =======
 
-.. option:: -c/--conf *conffile*
+.. option:: -c/--conf conffile
 
     names the ceph.conf file to use for configuration.  If -c is not
     specified, the default depends on the state of the --cluster option
@@ -35,21 +35,26 @@ Options
   
     so you can also pass this option in the environment as CEPH_CONF.
 
-.. option:: --cluster *clustername*
+.. option:: --cluster clustername
 
     set *clustername* for use in the $cluster metavariable, for
     locating the ceph.conf file.  The default is 'ceph'.
-    You can also pass this option in the environment as
-    CEPH_CLUSTER_NAME.
 
-.. option:: -n/--name *name*
+.. option:: -n/--name name
 
     specifies the client 'name', which is used to find the
     client-specific configuration options in the config file, and
     also is the name used for authentication when connecting
     to the cluster (the entity name appearing in ceph auth list output,
-    for example).  The default is 'client.restapi'.  You can also
-    pass this option in the environment as CEPH_NAME.
+    for example).  The default is 'client.restapi'. 
+
+.. option:: -i/--id id
+
+   specifies the client 'id', which will form the clientname
+   as 'client.<id>' if clientname is not set.  If -n/-name is
+   set, that takes precedence.
+
+   Also, global Ceph options are supported.
  
 
 Configuration parameters
@@ -57,16 +62,22 @@ Configuration parameters
 
 Supported configuration parameters include:
 
-* **restapi client name** the 'clientname' used for auth and ceph.conf
-* **restapi keyring** the keyring file holding the key for 'clientname'
-* **restapi public addr** ip:port to listen on (default 0.0.0.0:5000)
+* **keyring** the keyring file holding the key for 'clientname'
+* **public addr** ip:port to listen on (default 0.0.0.0:5000)
+* **log file** (usual Ceph default)
 * **restapi base url** the base URL to answer requests on (default /api/v0.1)
-* **restapi log level** critical, error, warning, info, debug
-* **restapi log file** (default /var/local/ceph/<clientname>.log)
+* **restapi log level** critical, error, warning, info, debug (default warning)
+
+Configuration parameters are searched in the standard order:
+first in the section named '<clientname>', then 'client', then 'global'.
 
-A server will run on **restapi public addr** if the ceph-rest-api
-executed directly; otherwise, configuration is specified by the
-enclosing WSGI web server.
+<clientname> is either supplied by -n/--name, "client.<id>" where
+<id> is supplied by -i/--id, or 'client.restapi' if neither option
+is present.
+
+A single-threaded server will run on **public addr** if the ceph-rest-api
+executed directly; otherwise, configuration is specified by the enclosing
+WSGI web server.
 
 Commands
 ========
@@ -92,7 +103,9 @@ with a small description of each command, is provided when the requested
 path is incomplete/partially matching.  Requesting / will redirect to
 the value of  **restapi base url**, and that path will give a full list
 of all known commands.  The command set is very similar to the commands
-supported by the **ceph** tool.
+supported by the **ceph** tool.  One notable exception is that the
+``ceph pg <pgid> <command>`` style of commands is supported here
+as ``tell/<pgid>/command?args``.
 
 Deployment as WSGI application
 ==============================
@@ -101,18 +114,22 @@ When deploying as WSGI application (say, with Apache/mod_wsgi,
 or nginx/uwsgi, or gunicorn, etc.), use the ``ceph_rest_api.py`` module
 (``ceph-rest-api`` is a thin layer around this module).  The standalone web
 server is of course not used, so address/port configuration is done in
-the WSGI server.  Also, configuration switches are not passed; rather,
-environment variables are used:
-
-* CEPH_CONF holds -c/--conf
-* CEPH_CLUSTER_NAME holds --cluster
-* CEPH_NAME holds -n/--name
-
-Any errors reading configuration or connecting to the cluster cause
-ImportError to be raised with a descriptive message on import; see
-your WSGI server documentation for how to see those messages in case
-of problem.
-
+the WSGI server.  Use a python .wsgi module or the equivalent to call
+``app = generate_app(conf, cluster, clientname, clientid, args)`` where:
+
+* conf is as -c/--conf above
+* cluster is as --cluster above
+* clientname, -n/--name
+* clientid, -i/--id, and
+* args are any other generic Ceph arguments
+
+When app is returned, it will have attributes 'ceph_addr' and 'ceph_port'
+set to what the address and port are in the Ceph configuration;
+those may be used for the server, or ignored.
+
+Any errors reading configuration or connecting to the cluster cause an
+exception to be raised; see your WSGI server documentation for how to
+see those messages in case of problem.
 
 Availability
 ============
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index b71e2a0f1bf..9216ca9c192 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -2,7 +2,7 @@
  Release Notes
 ===============
 
-v0.67-rc1
+v0.67-rc2
 ---------
 
 This is a release candidate for v0.67, code-named "Dumpling."  Any
@@ -19,8 +19,8 @@ The headline features for this release include:
 
 * Object namespaces in librados.
 
-Upgrading from  v0.66
-~~~~~~~~~~~~~~~~~~~~~
+Upgrading from v0.66
+~~~~~~~~~~~~~~~~~~~~
 
 * There is monitor internal protocol change, which means that v0.67
   ceph-mon daemons cannot talk to v0.66 or older daemons.  We
@@ -287,6 +287,42 @@ Notable Changes
  * misc code cleanups
 
 
+v0.61.7 "Cuttlefish"
+--------------------
+
+This release fixes another regression preventing monitors to start after
+undergoing certain upgrade sequences, as well as some corner cases with
+Paxos and support for unusual device names in ceph-disk/ceph-deploy.
+
+Notable Changes
+~~~~~~~~~~~~~~~
+
+* mon: fix regression in latest full osdmap retrieval
+* mon: fix a long-standing bug in a paxos corner case
+* ceph-disk: improved support for unusual device names (e.g., /dev/cciss/c0d0)
+
+For more detailed information, see :download:`the complete changelog <changelog/v0.61.7.txt>`.
+
+
+v0.61.6 "Cuttlefish"
+--------------------
+
+This release fixes a regression in v0.61.5 that could prevent monitors
+from restarting.  This affects any cluster that was upgraded from a
+previous version of Ceph (and not freshly created with v0.61.5).
+
+All users are strongly recommended to upgrade.
+
+Notable Changes
+~~~~~~~~~~~~~~~
+
+* mon: record latest full osdmap
+* mon: work around previous bug in which latest full osdmap is not recorded
+* mon: avoid scrub while updating
+
+For more detailed information, see :download:`the complete changelog <changelog/v0.61.6.txt>`.
+
+
 v0.61.5 "Cuttlefish"
 --------------------
 
diff --git a/man/ceph-authtool.8 b/man/ceph-authtool.8
index 47888af1f22..e64cac95f0a 100644
--- a/man/ceph-authtool.8
+++ b/man/ceph-authtool.8
@@ -69,16 +69,33 @@ will create a new keyring, overwriting any existing keyringfile
 .UNINDENT
 .INDENT 0.0
 .TP
-.B \-\-gen\-key
+.B \-g, \-\-gen\-key
 will generate a new secret key for the specified entityname
 .UNINDENT
 .INDENT 0.0
 .TP
-.B \-\-add\-key
+.B \-a, \-\-add\-key
 will add an encoded key to the keyring
 .UNINDENT
 .INDENT 0.0
 .TP
+.B \-u, \-\-set\-uid
+sets the auid (authenticated user id) for the specified entityname
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-gen\-print\-key
+will generate and print a new secret key without adding it to the keyringfile
+
+NOTE: will work without a given keyringfile
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-\-import\-keyring
+will import the content of a given keyring into the keyringfile
+.UNINDENT
+.INDENT 0.0
+.TP
 .B \-\-cap subsystem capability
 will set the capability for given subsystem
 .UNINDENT
diff --git a/man/ceph-rest-api.8 b/man/ceph-rest-api.8
index 33425fecc00..5170b7f37cf 100644
--- a/man/ceph-rest-api.8
+++ b/man/ceph-rest-api.8
@@ -1,4 +1,4 @@
-.TH "CEPH-REST-API" "8" "July 12, 2013" "dev" "Ceph"
+.TH "CEPH-REST-API" "8" "July 26, 2013" "dev" "Ceph"
 .SH NAME
 ceph-rest-api \- ceph RESTlike administration server
 .
@@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .
 .SH SYNOPSIS
 .nf
-\fBceph\-rest\-api\fP [ \-c \fIconffile\fP ] [ \-n \fIname\fP ... ]
+\fBceph\-rest\-api\fP [ \-c \fIconffile\fP ] [\-\-cluster \fIclustername\fP ] [ \-n \fIname\fP ] [\-i \fIid\fP ]
 .fi
 .sp
 .SH DESCRIPTION
@@ -44,7 +44,7 @@ command\-line tool through an HTTP\-accessible interface.
 .SH OPTIONS
 .INDENT 0.0
 .TP
-.B \-c/\-\-conf *conffile*
+.B \-c/\-\-conf conffile
 names the ceph.conf file to use for configuration.  If \-c is not
 specified, the default depends on the state of the \-\-cluster option
 (default \(aqceph\(aq; see below).  The configuration file is searched
@@ -64,43 +64,54 @@ so you can also pass this option in the environment as CEPH_CONF.
 .UNINDENT
 .INDENT 0.0
 .TP
-.B \-\-cluster *clustername*
+.B \-\-cluster clustername
 set \fIclustername\fP for use in the $cluster metavariable, for
 locating the ceph.conf file.  The default is \(aqceph\(aq.
-You can also pass this option in the environment as
-CEPH_CLUSTER_NAME.
 .UNINDENT
 .INDENT 0.0
 .TP
-.B \-n/\-\-name *name*
+.B \-n/\-\-name name
 specifies the client \(aqname\(aq, which is used to find the
 client\-specific configuration options in the config file, and
 also is the name used for authentication when connecting
 to the cluster (the entity name appearing in ceph auth list output,
-for example).  The default is \(aqclient.restapi\(aq.  You can also
-pass this option in the environment as CEPH_NAME.
+for example).  The default is \(aqclient.restapi\(aq.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B \-i/\-\-id id
+specifies the client \(aqid\(aq, which will form the clientname
+as \(aqclient.<id>\(aq if clientname is not set.  If \-n/\-name is
+set, that takes precedence.
+.sp
+Also, global Ceph options are supported.
 .UNINDENT
 .SH CONFIGURATION PARAMETERS
 .sp
 Supported configuration parameters include:
 .INDENT 0.0
 .IP \(bu 2
-\fBrestapi client name\fP the \(aqclientname\(aq used for auth and ceph.conf
+\fBkeyring\fP the keyring file holding the key for \(aqclientname\(aq
 .IP \(bu 2
-\fBrestapi keyring\fP the keyring file holding the key for \(aqclientname\(aq
+\fBpublic addr\fP ip:port to listen on (default 0.0.0.0:5000)
 .IP \(bu 2
-\fBrestapi public addr\fP ip:port to listen on (default 0.0.0.0:5000)
+\fBlog file\fP (usual Ceph default)
 .IP \(bu 2
 \fBrestapi base url\fP the base URL to answer requests on (default /api/v0.1)
 .IP \(bu 2
-\fBrestapi log level\fP critical, error, warning, info, debug
-.IP \(bu 2
-\fBrestapi log file\fP (default /var/local/ceph/<clientname>.log)
+\fBrestapi log level\fP critical, error, warning, info, debug (default warning)
 .UNINDENT
 .sp
-A server will run on \fBrestapi public addr\fP if the ceph\-rest\-api
-executed directly; otherwise, configuration is specified by the
-enclosing WSGI web server.
+Configuration parameters are searched in the standard order:
+first in the section named \(aq<clientname>\(aq, then \(aqclient\(aq, then \(aqglobal\(aq.
+.sp
+<clientname> is either supplied by \-n/\-\-name, "client.<id>" where
+<id> is supplied by \-i/\-\-id, or \(aqclient.restapi\(aq if neither option
+is present.
+.sp
+A single\-threaded server will run on \fBpublic addr\fP if the ceph\-rest\-api
+executed directly; otherwise, configuration is specified by the enclosing
+WSGI web server.
 .SH COMMANDS
 .sp
 Commands are submitted with HTTP GET requests (for commands that
@@ -122,28 +133,37 @@ with a small description of each command, is provided when the requested
 path is incomplete/partially matching.  Requesting / will redirect to
 the value of  \fBrestapi base url\fP, and that path will give a full list
 of all known commands.  The command set is very similar to the commands
-supported by the \fBceph\fP tool.
+supported by the \fBceph\fP tool.  One notable exception is that the
+\fBceph pg <pgid> <command>\fP style of commands is supported here
+as \fBtell/<pgid>/command?args\fP.
 .SH DEPLOYMENT AS WSGI APPLICATION
 .sp
 When deploying as WSGI application (say, with Apache/mod_wsgi,
 or nginx/uwsgi, or gunicorn, etc.), use the \fBceph_rest_api.py\fP module
 (\fBceph\-rest\-api\fP is a thin layer around this module).  The standalone web
 server is of course not used, so address/port configuration is done in
-the WSGI server.  Also, configuration switches are not passed; rather,
-environment variables are used:
+the WSGI server.  Use a python .wsgi module or the equivalent to call
+\fBapp = generate_app(conf, cluster, clientname, clientid, args)\fP where:
 .INDENT 0.0
 .IP \(bu 2
-CEPH_CONF holds \-c/\-\-conf
+conf is as \-c/\-\-conf above
+.IP \(bu 2
+cluster is as \-\-cluster above
 .IP \(bu 2
-CEPH_CLUSTER_NAME holds \-\-cluster
+clientname, \-n/\-\-name
 .IP \(bu 2
-CEPH_NAME holds \-n/\-\-name
+clientid, \-i/\-\-id, and
+.IP \(bu 2
+args are any other generic Ceph arguments
 .UNINDENT
 .sp
-Any errors reading configuration or connecting to the cluster cause
-ImportError to be raised with a descriptive message on import; see
-your WSGI server documentation for how to see those messages in case
-of problem.
+When app is returned, it will have attributes \(aqceph_addr\(aq and \(aqceph_port\(aq
+set to what the address and port are in the Ceph configuration;
+those may be used for the server, or ignored.
+.sp
+Any errors reading configuration or connecting to the cluster cause an
+exception to be raised; see your WSGI server documentation for how to
+see those messages in case of problem.
 .SH AVAILABILITY
 .sp
 \fBceph\-rest\-api\fP is part of the Ceph distributed file system. Please refer to the Ceph documentation at
diff --git a/qa/fs/.gitignore b/qa/fs/.gitignore
new file mode 100644
index 00000000000..a2d280e0794
--- /dev/null
+++ b/qa/fs/.gitignore
@@ -0,0 +1 @@
+/test_o_trunc
diff --git a/qa/workunits/rest/test.py b/qa/workunits/rest/test.py
index 272760c8289..f0cf1f2c761 100755
--- a/qa/workunits/rest/test.py
+++ b/qa/workunits/rest/test.py
@@ -1,8 +1,8 @@
 #!/usr/bin/python
 
 import exceptions
+import json
 import os
-# import nosetests
 import requests
 import subprocess
 import sys
@@ -43,9 +43,10 @@ def expect(url, method, respcode, contenttype, extra_hdrs=None, data=None):
 
     if contenttype.startswith('application'):
         if r_contenttype == 'application/json':
-            # may raise
             try:
-                assert(r.json != None)
+                # older requests.py doesn't create r.myjson; create it myself
+                r.myjson = json.loads(r.content)
+                assert(r.myjson != None)
             except Exception as e:
                 fail(r, 'Invalid JSON returned: "{0}"'.format(str(e)))
 
@@ -71,7 +72,7 @@ if __name__ == '__main__':
     expect('auth/export', 'GET', 200, 'xml', XMLHDR)
 
     expect('auth/add?entity=client.xx&'
-           'caps=mon&caps=allow&caps=osd&caps=allow *', 'PUT', 200, 'json',
+           'caps=mon&caps=allow&caps=osd&caps=allow+*', 'PUT', 200, 'json',
             JSONHDR)
 
     r = expect('auth/export?entity=client.xx', 'GET', 200, 'plain')
@@ -83,7 +84,7 @@ if __name__ == '__main__':
     assert('client.xx' in r.content)
 
     r = expect('auth/list.json', 'GET', 200, 'json')
-    dictlist = r.json['output']['auth_dump']
+    dictlist = r.myjson['output']['auth_dump']
     xxdict = [d for d in dictlist if d['entity'] == 'client.xx'][0]
     assert(xxdict)
     assert('caps' in xxdict)
@@ -94,10 +95,10 @@ if __name__ == '__main__':
     expect('auth/print-key?entity=client.xx', 'GET', 200, 'json', JSONHDR)
     expect('auth/print_key?entity=client.xx', 'GET', 200, 'json', JSONHDR)
 
-    expect('auth/caps?entity=client.xx&caps=osd&caps=allow rw', 'PUT', 200,
+    expect('auth/caps?entity=client.xx&caps=osd&caps=allow+rw', 'PUT', 200,
            'json', JSONHDR)
     r = expect('auth/list.json', 'GET', 200, 'json')
-    dictlist = r.json['output']['auth_dump']
+    dictlist = r.myjson['output']['auth_dump']
     xxdict = [d for d in dictlist if d['entity'] == 'client.xx'][0]
     assert(xxdict)
     assert('caps' in xxdict)
@@ -116,7 +117,7 @@ if __name__ == '__main__':
     expect('auth/del?entity=client.xx', 'PUT', 200, 'json', JSONHDR)
 
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert('epoch' in r.json['output'])
+    assert('epoch' in r.myjson['output'])
 
     assert('GLOBAL' in expect('df', 'GET', 200, 'plain').content)
     assert('CATEGORY' in expect('df?detail=detail', 'GET', 200, 'plain').content)
@@ -124,12 +125,12 @@ if __name__ == '__main__':
     assert('CATEGORY' in expect('df?detail', 'GET', 200, 'plain').content)
 
     r = expect('df', 'GET', 200, 'json', JSONHDR)
-    assert('total_used' in r.json['output']['stats'])
+    assert('total_used' in r.myjson['output']['stats'])
     r = expect('df', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/stats/stats/total_used') is not None)
 
     r = expect('df?detail', 'GET', 200, 'json', JSONHDR)
-    assert('rd_kb' in r.json['output']['pools'][0]['stats'])
+    assert('rd_kb' in r.myjson['output']['pools'][0]['stats'])
     r = expect('df?detail', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/stats/pools/pool/stats/rd_kb') is not None)
 
@@ -149,7 +150,7 @@ if __name__ == '__main__':
     expect('mds/compat/rm_incompat?feature=4', 'PUT', 200, '')
 
     r = expect('mds/compat/show', 'GET', 200, 'json', JSONHDR)
-    assert('incompat' in r.json['output'])
+    assert('incompat' in r.myjson['output'])
     r = expect('mds/compat/show', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/mds_compat/incompat') is not None)
 
@@ -157,8 +158,8 @@ if __name__ == '__main__':
     expect('mds/deactivate?who=2', 'PUT', 400, '')
 
     r = expect('mds/dump.json', 'GET', 200, 'json')
-    assert('created' in r.json['output'])
-    current_epoch = r.json['output']['epoch']
+    assert('created' in r.myjson['output'])
+    current_epoch = r.myjson['output']['epoch']
     r = expect('mds/dump.xml', 'GET', 200, 'xml')
     assert(r.tree.find('output/mdsmap/created') is not None)
 
@@ -171,7 +172,7 @@ if __name__ == '__main__':
            200, '')
     expect('osd/pool/create?pool=data2&pg_num=10', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    pools = r.json['output']['pools']
+    pools = r.myjson['output']['pools']
     poolnum = None
     for p in pools:
         if p['pool_name'] == 'data2':
@@ -185,12 +186,12 @@ if __name__ == '__main__':
            '&sure=--yes-i-really-really-mean-it', 'PUT', 200, '')
     expect('mds/set_max_mds?maxmds=4', 'PUT', 200, '')
     r = expect('mds/dump.json', 'GET', 200, 'json')
-    assert(r.json['output']['max_mds'] == 4)
+    assert(r.myjson['output']['max_mds'] == 4)
     expect('mds/set_max_mds?maxmds=3', 'PUT', 200, '')
     r = expect('mds/stat.json', 'GET', 200, 'json')
-    assert('info' in r.json['output'])
+    assert('info' in r.myjson['output']['mdsmap'])
     r = expect('mds/stat.xml', 'GET', 200, 'xml')
-    assert(r.tree.find('output/mdsmap/info') is not None)
+    assert(r.tree.find('output/mds_stat/mdsmap/info') is not None)
 
     # more content tests below, just check format here
     expect('mon/dump.json', 'GET', 200, 'json')
@@ -199,17 +200,17 @@ if __name__ == '__main__':
     r = expect('mon/getmap', 'GET', 200, '')
     assert(len(r.content) != 0)
     r = expect('mon_status.json', 'GET', 200, 'json')
-    assert('name' in r.json['output'])
+    assert('name' in r.myjson['output'])
     r = expect('mon_status.xml', 'GET', 200, 'xml')
     assert(r.tree.find('output/mon_status/name') is not None)
 
     bl = '192.168.0.1:0/1000'
     expect('osd/blacklist?blacklistop=add&addr=' + bl, 'PUT', 200, '')
     r = expect('osd/blacklist/ls.json', 'GET', 200, 'json')
-    assert([b for b in r.json['output'] if b['addr'] == bl])
+    assert([b for b in r.myjson['output'] if b['addr'] == bl])
     expect('osd/blacklist?blacklistop=rm&addr=' + bl, 'PUT', 200, '')
     r = expect('osd/blacklist/ls.json', 'GET', 200, 'json')
-    assert([b for b in r.json['output'] if b['addr'] == bl] == [])
+    assert([b for b in r.myjson['output'] if b['addr'] == bl] == [])
 
     expect('osd/crush/tunables?profile=legacy', 'PUT', 200, '')
     expect('osd/crush/tunables?profile=bobtail', 'PUT', 200, '')
@@ -222,73 +223,73 @@ if __name__ == '__main__':
 
     expect('osd/down?ids=0', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osds'][0]['osd'] == 0)
-    assert(r.json['output']['osds'][0]['up'] == 0)
+    assert(r.myjson['output']['osds'][0]['osd'] == 0)
+    assert(r.myjson['output']['osds'][0]['up'] == 0)
 
     expect('osd/unset?key=noup', 'PUT', 200, '')
 
     for i in range(0,100):
         r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-        assert(r.json['output']['osds'][0]['osd'] == 0)
-        if r.json['output']['osds'][0]['up'] == 1:
+        assert(r.myjson['output']['osds'][0]['osd'] == 0)
+        if r.myjson['output']['osds'][0]['up'] == 1:
             break
         else:
             print >> sys.stderr, "waiting for osd.0 to come back up"
             time.sleep(10)
 
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osds'][0]['osd'] == 0)
-    assert(r.json['output']['osds'][0]['up'] == 1)
+    assert(r.myjson['output']['osds'][0]['osd'] == 0)
+    assert(r.myjson['output']['osds'][0]['up'] == 1)
 
     r = expect('osd/find?id=1', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osd'] == 1)
+    assert(r.myjson['output']['osd'] == 1)
 
     expect('osd/out?ids=1', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osds'][1]['osd'] == 1)
-    assert(r.json['output']['osds'][1]['in'] == 0)
+    assert(r.myjson['output']['osds'][1]['osd'] == 1)
+    assert(r.myjson['output']['osds'][1]['in'] == 0)
 
     expect('osd/in?ids=1', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osds'][1]['osd'] == 1)
-    assert(r.json['output']['osds'][1]['in'] == 1)
+    assert(r.myjson['output']['osds'][1]['osd'] == 1)
+    assert(r.myjson['output']['osds'][1]['in'] == 1)
 
     r = expect('osd/find?id=0', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['osd'] == 0)
+    assert(r.myjson['output']['osd'] == 0)
 
     r = expect('osd/getmaxosd', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/getmaxosd/max_osd') is not None)
     r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR)
-    saved_maxosd = r.json['output']['max_osd']
+    saved_maxosd = r.myjson['output']['max_osd']
     expect('osd/setmaxosd?newmax=10', 'PUT', 200, '')
     r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['max_osd'] == 10)
+    assert(r.myjson['output']['max_osd'] == 10)
     expect('osd/setmaxosd?newmax={0}'.format(saved_maxosd), 'PUT', 200, '')
     r = expect('osd/getmaxosd', 'GET', 200, 'json', JSONHDR)
-    assert(r.json['output']['max_osd'] == saved_maxosd)
+    assert(r.myjson['output']['max_osd'] == saved_maxosd)
 
     r = expect('osd/create', 'PUT', 200, 'json', JSONHDR)
-    assert('osdid' in r.json['output'])
-    osdid = r.json['output']['osdid']
+    assert('osdid' in r.myjson['output'])
+    osdid = r.myjson['output']['osdid']
     expect('osd/lost?id={0}'.format(osdid), 'PUT', 400, '')
     expect('osd/lost?id={0}&sure=--yes-i-really-mean-it'.format(osdid),
            'PUT', 200, 'json', JSONHDR)
     expect('osd/rm?ids={0}'.format(osdid), 'PUT', 200, '')
     r = expect('osd/ls', 'GET', 200, 'json', JSONHDR)
-    assert(isinstance(r.json['output'], list))
+    assert(isinstance(r.myjson['output'], list))
     r = expect('osd/ls', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/osds/osd') is not None)
 
 
     expect('osd/pause', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert('pauserd,pausewr' in r.json['output']['flags'])
+    assert('pauserd,pausewr' in r.myjson['output']['flags'])
     expect('osd/unpause', 'PUT', 200, '')
     r = expect('osd/dump', 'GET', 200, 'json', JSONHDR)
-    assert('pauserd,pausewr' not in r.json['output']['flags'])
+    assert('pauserd,pausewr' not in r.myjson['output']['flags'])
 
     r = expect('osd/tree', 'GET', 200, 'json', JSONHDR)
-    assert('nodes' in r.json['output'])
+    assert('nodes' in r.myjson['output'])
     r = expect('osd/tree', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/tree/nodes') is not None)
     expect('osd/pool/mksnap?pool=data&snap=datasnap', 'PUT', 200, '')
@@ -297,21 +298,21 @@ if __name__ == '__main__':
     expect('osd/pool/rmsnap?pool=data&snap=datasnap', 'PUT', 200, '')
 
     expect('osd/pool/create?pool=data2&pg_num=10', 'PUT', 200, '')
-    r = expect('osd/lspools', 'PUT', 200, 'json', JSONHDR)
-    assert([p for p in r.json['output'] if p['poolname'] == 'data2'])
+    r = expect('osd/lspools', 'GET', 200, 'json', JSONHDR)
+    assert([p for p in r.myjson['output'] if p['poolname'] == 'data2'])
     expect('osd/pool/rename?srcpool=data2&destpool=data3', 'PUT', 200, '')
-    r = expect('osd/lspools', 'PUT', 200, 'json', JSONHDR)
-    assert([p for p in r.json['output'] if p['poolname'] == 'data3'])
+    r = expect('osd/lspools', 'GET', 200, 'json', JSONHDR)
+    assert([p for p in r.myjson['output'] if p['poolname'] == 'data3'])
     expect('osd/pool/delete?pool=data3', 'PUT', 400, '')
     expect('osd/pool/delete?pool=data3&pool2=data3&sure=--yes-i-really-really-mean-it', 'PUT', 200, '')
 
     r = expect('osd/stat', 'GET', 200, 'json', JSONHDR)
-    assert('num_up_osds' in r.json['output'])
+    assert('num_up_osds' in r.myjson['output'])
     r = expect('osd/stat', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/osdmap/num_up_osds') is not None)
 
     r = expect('osd/ls', 'GET', 200, 'json', JSONHDR)
-    for osdid in r.json['output']:
+    for osdid in r.myjson['output']:
         # XXX no tell yet
         # expect('tell?target=osd.{0}&args=version'.format(osdid), 'PUT',
         #         200, '')
@@ -321,7 +322,7 @@ if __name__ == '__main__':
     expect('pg/debug?debugop=degraded_pgs_exist', 'GET', 200, '')
     expect('pg/deep-scrub?pgid=0.0', 'PUT', 200, '')
     r = expect('pg/dump', 'GET', 200, 'json', JSONHDR)
-    assert('pg_stats_sum' in r.json['output'])
+    assert('pg_stats_sum' in r.myjson['output'])
     r = expect('pg/dump', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/pg_map/pg_stats_sum') is not None)
 
@@ -335,8 +336,8 @@ if __name__ == '__main__':
     assert(len(r.content) != 0)
 
     r = expect('pg/map?pgid=0.0', 'GET', 200, 'json', JSONHDR)
-    assert('acting' in r.json['output'])
-    assert(r.json['output']['pgid'] == '0.0')
+    assert('acting' in r.myjson['output'])
+    assert(r.myjson['output']['pgid'] == '0.0')
     r = expect('pg/map?pgid=0.0', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/pg_map/acting') is not None)
     assert(r.tree.find('output/pg_map/pgid').text == '0.0')
@@ -348,15 +349,15 @@ if __name__ == '__main__':
 
     expect('pg/set_full_ratio?ratio=0.90', 'PUT', 200, '')
     r = expect('pg/dump', 'GET', 200, 'json', JSONHDR)
-    assert(float(r.json['output']['full_ratio']) == 0.90)
+    assert(float(r.myjson['output']['full_ratio']) == 0.90)
     expect('pg/set_full_ratio?ratio=0.95', 'PUT', 200, '')
     expect('pg/set_nearfull_ratio?ratio=0.90', 'PUT', 200, '')
     r = expect('pg/dump', 'GET', 200, 'json', JSONHDR)
-    assert(float(r.json['output']['near_full_ratio']) == 0.90)
+    assert(float(r.myjson['output']['near_full_ratio']) == 0.90)
     expect('pg/set_full_ratio?ratio=0.85', 'PUT', 200, '')
 
     r = expect('pg/stat', 'GET', 200, 'json', JSONHDR)
-    assert('pg_stats_sum' in r.json['output'])
+    assert('pg_stats_sum' in r.myjson['output'])
     r = expect('pg/stat', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/pg_map/pg_stats_sum') is not None)
 
@@ -367,12 +368,12 @@ if __name__ == '__main__':
 
     # report's CRC needs to be handled
     # r = expect('report', 'GET', 200, 'json', JSONHDR)
-    # assert('osd_stats' in r.json['output'])
+    # assert('osd_stats' in r.myjson['output'])
     # r = expect('report', 'GET', 200, 'xml', XMLHDR)
     # assert(r.tree.find('output/report/osdmap') is not None)
 
     r = expect('status', 'GET', 200, 'json', JSONHDR)
-    assert('osdmap' in r.json['output'])
+    assert('osdmap' in r.myjson['output'])
     r = expect('status', 'GET', 200, 'xml', XMLHDR)
     assert(r.tree.find('output/status/osdmap') is not None)
 
@@ -393,21 +394,21 @@ if __name__ == '__main__':
     for v in ['pg_num', 'pgp_num', 'size', 'min_size', 'crash_replay_interval',
               'crush_ruleset']:
         r = expect('osd/pool/get.json?pool=data&var=' + v, 'GET', 200, 'json')
-        assert(v in r.json['output'])
+        assert(v in r.myjson['output'])
 
     r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json')
-    assert(r.json['output']['size'] == 2)
+    assert(r.myjson['output']['size'] == 2)
 
     expect('osd/pool/set?pool=data&var=size&val=3', 'PUT', 200, 'plain')
     r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json')
-    assert(r.json['output']['size'] == 3)
+    assert(r.myjson['output']['size'] == 3)
 
     expect('osd/pool/set?pool=data&var=size&val=2', 'PUT', 200, 'plain')
     r = expect('osd/pool/get.json?pool=data&var=size', 'GET', 200, 'json')
-    assert(r.json['output']['size'] == 2)
+    assert(r.myjson['output']['size'] == 2)
 
     r = expect('osd/pool/get.json?pool=rbd&var=crush_ruleset', 'GET', 200, 'json')
-    assert(r.json['output']['crush_ruleset'] == 2)
+    assert(r.myjson['output']['crush_ruleset'] == 2)
 
     expect('osd/thrash?num_epochs=10', 'PUT', 200, '')
     print 'OK'
diff --git a/qa/workunits/suites/fsync-tester.sh b/qa/workunits/suites/fsync-tester.sh
index bdb7e583846..b056e3be0d6 100755
--- a/qa/workunits/suites/fsync-tester.sh
+++ b/qa/workunits/suites/fsync-tester.sh
@@ -6,3 +6,6 @@ wget http://ceph.com/qa/fsync-tester.c
 gcc fsync-tester.c -o fsync-tester
 
 ./fsync-tester
+
+lsof
+
diff --git a/src/Makefile.am b/src/Makefile.am
index 307abf2f965..93f33312508 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -504,7 +504,7 @@ ceph_radosacl_SOURCES = radosacl.cc
 ceph_radosacl_LDADD = librados.la $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXTRALIBS)
 bin_DEBUGPROGRAMS += ceph_scratchtool ceph_scratchtoolpp ceph_radosacl
 
-rbd_SOURCES = rbd.cc common/fiemap.cc common/secret.c common/TextTable.cc common/util.cc
+rbd_SOURCES = rbd.cc common/secret.c common/TextTable.cc common/util.cc
 rbd_CXXFLAGS = ${AM_CXXFLAGS}
 rbd_LDADD = librbd.la librados.la $(LIBGLOBAL_LDA) -lkeyutils
 if LINUX
@@ -762,6 +762,11 @@ unittest_addrs_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
 unittest_addrs_LDADD = $(LIBGLOBAL_LDA) ${UNITTEST_LDADD}
 check_PROGRAMS += unittest_addrs
 
+unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
+unittest_sharedptr_registry_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
+unittest_sharedptr_registry_LDADD = libcommon.la ${LIBGLOBAL_LDA} ${UNITTEST_LDADD}
+check_PROGRAMS += unittest_sharedptr_registry
+
 unittest_util_SOURCES = test/common/test_util.cc common/util.cc
 unittest_util_CXXFLAGS = ${AM_CXXFLAGS} ${UNITTEST_CXXFLAGS}
 unittest_util_LDADD = libcommon.la $(PTHREAD_LIBS) -lm ${UNITTEST_LDADD} $(CRYPTO_LIBS) $(EXTRALIBS)
@@ -1895,7 +1900,6 @@ noinst_HEADERS = \
         include/encoding.h\
         include/err.h\
         include/error.h\
-        include/fiemap.h\
         include/filepath.h\
         include/frag.h\
         include/hash.h\
diff --git a/src/ceph-disk b/src/ceph-disk
index db988b0d5e3..77a9d9a2612 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk
@@ -182,7 +182,7 @@ class FilesystemTypeError(Error):
 
 def maybe_mkdir(*a, **kw):
     """
-    Creates a new directory if it doesn't exist, removes 
+    Creates a new directory if it doesn't exist, removes
     existing symlink before creating the directory.
     """
     # remove any symlink, if it is there..
@@ -495,7 +495,7 @@ def _check_output(*args, **kwargs):
 
 def get_conf(cluster, variable):
     """
-    Get the value of the given configuration variable from the 
+    Get the value of the given configuration variable from the
     cluster.
 
     :raises: Error if call to ceph-conf fails.
@@ -654,7 +654,7 @@ def mount(
     options,
     ):
     """
-    Mounts a device with given filessystem type and 
+    Mounts a device with given filessystem type and
     mount options to a tempfile path under /var/lib/ceph/tmp.
     """
     # pick best-of-breed mount options based on fs type
@@ -1307,18 +1307,37 @@ def auth_key(
     osd_id,
     keyring,
     ):
-    subprocess.check_call(
-        args=[
-            '/usr/bin/ceph',
-            '--cluster', cluster,
-            '--name', 'client.bootstrap-osd',
-            '--keyring', keyring,
-            'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
-            '-i', os.path.join(path, 'keyring'),
-            'osd', 'allow *',
-            'mon', 'allow rwx',
-            ],
-        )
+    try:
+        # try dumpling+ cap scheme
+        subprocess.check_call(
+            args=[
+                '/usr/bin/ceph',
+                '--cluster', cluster,
+                '--name', 'client.bootstrap-osd',
+                '--keyring', keyring,
+                'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
+                '-i', os.path.join(path, 'keyring'),
+                'osd', 'allow *',
+                'mon', 'allow profile osd',
+                ],
+            )
+    except subprocess.CalledProcessError as err:
+        if err.errno == errno.EACCES:
+            # try old cap scheme
+            subprocess.check_call(
+                args=[
+                    '/usr/bin/ceph',
+                    '--cluster', cluster,
+                    '--name', 'client.bootstrap-osd',
+                    '--keyring', keyring,
+                    'auth', 'add', 'osd.{osd_id}'.format(osd_id=osd_id),
+                    '-i', os.path.join(path, 'keyring'),
+                    'osd', 'allow *',
+                    'mon', 'allow rwx',
+                ],
+                )
+        else:
+            raise
 
 
 def move_mount(
@@ -1698,7 +1717,7 @@ def main_activate(args):
                 )
         else:
             raise Error('%s is not a directory or block device', args.path)
-    
+
         start_daemon(
             cluster=cluster,
             osd_id=osd_id,
@@ -1951,7 +1970,7 @@ def list_dev(dev, uuid_map, journal_map):
 
     print '%s%s %s' % (prefix, dev, ', '.join(desc))
 
-    
+
 
 def main_list(args):
     partmap = list_all_partitions()
@@ -1960,7 +1979,7 @@ def main_list(args):
     journal_map = {}
     for base, parts in sorted(partmap.iteritems()):
         for p in parts:
-            dev = '/dev/' + p
+            dev = get_dev_path(p)
             part_uuid = get_partition_uuid(dev)
             if part_uuid:
                 uuid_map[part_uuid] = dev
@@ -1980,11 +1999,11 @@ def main_list(args):
 
     for base, parts in sorted(partmap.iteritems()):
         if parts:
-            print '/dev/%s :' % base
+            print '%s :' % get_dev_path(base)
             for p in sorted(parts):
-                list_dev('/dev/' + p, uuid_map, journal_map)
+                list_dev(get_dev_path(p), uuid_map, journal_map)
         else:
-            list_dev('/dev/' + base, uuid_map, journal_map)
+            list_dev(get_dev_path(base), uuid_map, journal_map)
 
 
 ###########################
diff --git a/src/ceph-rest-api b/src/ceph-rest-api
index a44919acd6f..ae5245b4f76 100755
--- a/src/ceph-rest-api
+++ b/src/ceph-rest-api
@@ -33,34 +33,35 @@ def parse_args():
     parser.add_argument('-c', '--conf', help='Ceph configuration file')
     parser.add_argument('--cluster', help='Ceph cluster name')
     parser.add_argument('-n', '--name', help='Ceph client name')
+    parser.add_argument('-i', '--id', help='Ceph client id')
 
-    return parser.parse_args()
-
+    return parser.parse_known_args()
 
 # main
 
-parsed_args = parse_args()
-if parsed_args.conf:
-    os.environ['CEPH_CONF'] = parsed_args.conf
-if parsed_args.cluster:
-    os.environ['CEPH_CLUSTER_NAME'] = parsed_args.cluster
-if parsed_args.name:
-    os.environ['CEPH_NAME'] = parsed_args.name
+parsed_args, rest = parse_args()
 
 # import now that env vars are available to imported module
 
 try:
     import ceph_rest_api
-except Exception as e:
+except EnvironmentError as e:
     print >> sys.stderr, "Error importing ceph_rest_api: ", str(e)
     sys.exit(1)
 
-# importing ceph_rest_api has set module globals 'app', 'addr', and 'port' 
+# let other exceptions generate traceback
+
+app = ceph_rest_api.generate_app(
+    parsed_args.conf,
+    parsed_args.cluster,
+    parsed_args.name,
+    parsed_args.id,
+    rest,
+)
 
 files = [os.path.split(fr[1])[-1] for fr in inspect.stack()]
 if 'pdb.py' in files:
-    ceph_rest_api.app.run(host=ceph_rest_api.addr, port=ceph_rest_api.port,
-                          debug=True, use_reloader=False, use_debugger=False)
+    app.run(host=app.ceph_addr, port=app.ceph_port,
+            debug=True, use_reloader=False, use_debugger=False)
 else:
-    ceph_rest_api.app.run(host=ceph_rest_api.addr, port=ceph_rest_api.port,
-                          debug=True)
+    app.run(host=app.ceph_addr, port=app.ceph_port)
diff --git a/src/ceph.in b/src/ceph.in
index e6806786e7e..22c54a48689 100755
--- a/src/ceph.in
+++ b/src/ceph.in
@@ -64,6 +64,9 @@ cluster_handle = None
 
 def osdids():
     ret, outbuf, outs = json_command(cluster_handle, prefix='osd ls')
+    if ret == -errno.EINVAL:
+        # try old mon
+        ret, outbuf, outs = send_command(cluster_handle, cmd=['osd', 'ls'])
     if ret:
         raise RuntimeError('Can\'t contact mon for osd list')
     return [i for i in outbuf.split('\n') if i != '']
@@ -71,6 +74,10 @@ def osdids():
 def monids():
     ret, outbuf, outs = json_command(cluster_handle, prefix='mon dump',
                                      argdict={'format':'json'})
+    if ret == -errno.EINVAL:
+        # try old mon
+        ret, outbuf, outs = send_command(cluster_handle,
+                                         cmd=['mon', 'dump', '--format=json'])
     if ret:
         raise RuntimeError('Can\'t contact mon for mon list')
     d = json.loads(outbuf)
@@ -79,6 +86,10 @@ def monids():
 def mdsids():
     ret, outbuf, outs = json_command(cluster_handle, prefix='mds dump',
                                      argdict={'format':'json'})
+    if ret == -errno.EINVAL:
+        # try old mon
+        ret, outbuf, outs = send_command(cluster_handle,
+                                         cmd=['mds', 'dump', '--format=json'])
     if ret:
         raise RuntimeError('Can\'t contact mon for mds list')
     d = json.loads(outbuf)
@@ -100,8 +111,6 @@ def parse_cmdargs(args=None, target=''):
 
     parser.add_argument('-h', '--help', help='request mon help',
                         action='store_true')
-    parser.add_argument('--help-all', help='request help for all daemons',
-                        action='store_true')
 
     parser.add_argument('-c', '--conf', dest='cephconf',
                         help='ceph configuration file')
@@ -150,14 +159,16 @@ def parse_cmdargs(args=None, target=''):
 
     return parser, parsed_args, extras
 
-def do_help(parser, args, help_all = False):
+def do_help(parser, args):
     """
     Print basic parser help
-    If the cluster is available:
-        get and print monitor help; 
-        if help_all, print help for daemon commands as well 
+    If the cluster is available, get and print monitor help
     """
 
+    def help_for_sigs(sigs, partial=None):
+        sys.stdout.write(format_help(parse_json_funcsigs(sigs, 'cli'),
+                         partial=partial))
+
     def help_for_target(target, partial=None):
         ret, outbuf, outs = json_command(cluster_handle, target=target,
                                          prefix='get_command_descriptions', 
@@ -167,40 +178,19 @@ def do_help(parser, args, help_all = False):
                 "couldn't get command descriptions for {0}: {1}".\
                 format(target, outs)
         else:
-            sys.stdout.write(format_help(parse_json_funcsigs(outbuf, 'cli'),
-                             partial))
+            help_for_sigs(outbuf, partial)
 
-    parser.print_help()
-    print '\n'
-    if (cluster_handle):
-        help_for_target(target=('mon', ''), partial=' '.join(args))
 
-    if help_all and cluster_handle:
-        # try/except in case there are no daemons of that type
-        try:
-            firstosd = osdids()[0]
-            print '\nOSD.{0} tell commands and pg pgid commands:\n\n'.\
-                format(firstosd)
-            help_for_target(target=('osd', osdids()[0]))
-
-            print '\nOSD daemon commands:\n\n'
-            sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'osd.' + firstosd), ['get_command_descriptions']), 'cli')))
-        except:
-            pass
+    def hdr(s):
+        print '\n', s, '\n', '=' * len(s)
 
-        try:
-            firstmon = monids()[0]
-            print '\nmon.{0} daemon commands:\n\n'.format(firstmon)
-            sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'mon.' + firstmon), ['get_command_descriptions']), 'cli')))
-        except:
-            pass
+    hdr('Monitor commands:')
+    partial = ' '.join(args)
+    parser.print_help()
+    print '\n'
 
-        try:
-            firstmds = mdsids()[0]
-            print '\nmds.{0} daemon commands:\n\n'.format(firstmds)
-            sys.stdout.write(format_help(parse_json_funcsigs(admin_socket(ceph_conf('admin_socket', 'mds.' + firstmds), ['get_command_descriptions']), 'cli')))
-        except:
-            pass
+    if (cluster_handle):
+        help_for_target(target=('mon', ''), partial=partial)
 
     return 0
 
@@ -285,26 +275,57 @@ def format_help(cmddict, partial=None):
 
     return fullusage
 
-def admin_socket(asok_path, cmd):
-    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+def admin_socket(asok_path, cmd, format=''):
+    """
+    Send a daemon (--admin-daemon) command 'cmd'.  asok_path is the
+    path to the admin socket; cmd is a list of strings; format may be
+    set to one of the formatted forms to get output in that form
+    (daemon commands don't support 'plain' output).
+    """
+
+    def do_sockio(path, cmd):
+        """ helper: do all the actual low-level stream I/O """
+        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        sock.connect(path)
+        try:
+            sock.sendall(cmd + '\0')
+            len_str = sock.recv(4)
+            if len(len_str) < 4:
+                raise RuntimeError("no data returned from admin socket")
+            l, = struct.unpack(">I", len_str)
+            ret = ''
+
+            got = 0
+            while got < l:
+                bit = sock.recv(l - got)
+                ret += bit
+                got += len(bit)
+
+        except Exception as e:
+            raise RuntimeError('exception: ' + str(e))
+        return ret
+
     try:
-        sock.connect(asok_path)
-        sock.sendall(' '.join(cmd) + '\0')
+        cmd_json = do_sockio(asok_path,
+            json.dumps({"prefix":"get_command_descriptions"}))
+    except Exception as e:
+        raise RuntimeError('exception getting command descriptions: ' + str(e))
 
-        len_str = sock.recv(4)
-        if len(len_str) < 4:
-            raise RuntimeError("no data returned from admin socket")
-        l, = struct.unpack(">I", len_str)
-        ret = ''
+    if cmd == 'get_command_descriptions':
+        return cmd_json
 
-        got = 0
-        while got < l:
-            bit = sock.recv(l - got)
-            ret += bit
-            got += len(bit)
+    sigdict = parse_json_funcsigs(cmd_json, 'cli')
+    valid_dict = validate_command(sigdict, cmd)
+    if not valid_dict:
+        return -errno.EINVAL
 
+    if format:
+        valid_dict['format'] = format
+
+    try:
+        ret = do_sockio(asok_path, json.dumps(valid_dict))
     except Exception as e:
-        raise RuntimeError('exception: {0}'.format(e))
+        raise RuntimeError('exception: ' + str(e))
 
     return ret
 
@@ -344,10 +365,11 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose):
     if not got_command:
         if cmdargs:
             # Validate input args against list of sigs
-            valid_dict = validate_command(parsed_args, sigdict, cmdargs,
-                                          verbose)
+            valid_dict = validate_command(sigdict, cmdargs, verbose)
             if valid_dict:
                 got_command = True
+                if parsed_args.output_format:
+                    valid_dict['format'] = parsed_args.output_format
             else:
                 return -errno.EINVAL, '', 'invalid command'
         else:
@@ -360,8 +382,10 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose):
                     return 0, '', ''
                 cmdargs = parse_cmdargs(interactive_input.split())[2]
                 target = find_cmd_target(cmdargs)
-                valid_dict = validate_command(parsed_args, sigdict, cmdargs)
+                valid_dict = validate_command(sigdict, cmdargs, verbose)
                 if valid_dict:
+                    if parsed_args.output_format:
+                        valid_dict['format'] = parsed_args.output_format
                     if verbose:
                         print >> sys.stderr, "Submitting command ", valid_dict
                     ret, outbuf, outs = json_command(cluster_handle,
@@ -378,45 +402,6 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose):
     return json_command(cluster_handle, target=target, argdict=valid_dict,
                         inbuf=inbuf)
 
-OSD_TELL_MATCH = { \
-    'sig': ['tell', {'name':'target','type':'CephName'}], \
-    'matchnum': 2, \
-    'return_key': 'target', \
-}
-PGID_MATCH = { \
-    'sig': ['pg', {'name':'pgid','type':'CephPgid'}], \
-    'matchnum': 2, \
-    'return_key': 'pgid', \
-}
-
-def find_cmd_target(childargs):
-    """
-    Using a minimal validation, figure out whether the command
-    should be sent to a monitor or an osd.  We do this before even
-    asking for the 'real' set of command signatures, so we can ask the
-    right daemon.
-    Returns ('osd', osdid), ('pg', pgid), or ('mon', '')
-    """
-    sig = parse_funcsig(['tell', {'name':'target', 'type':'CephName'}])
-    try:
-        valid_dict = validate(childargs, sig, partial=True)
-        if len(valid_dict) == 2:
-            name = CephName()
-            name.valid(valid_dict['target'])
-            return name.nametype, name.nameid
-    except ArgumentError:
-        pass
-
-    sig = parse_funcsig(['pg', {'name':'pgid', 'type':'CephPgid'}])
-    try:
-        valid_dict = validate(childargs, sig, partial=True)
-        if len(valid_dict) == 2:
-            return 'pg', valid_dict['pgid']
-    except ArgumentError:
-        pass
-
-    return 'mon', ''
-
 def complete(sigdict, args, target):
     """
     Command completion.  Match as much of [args] as possible,
@@ -509,9 +494,12 @@ def main():
         conffile = parsed_args.cephconf
     # For now, --admin-daemon is handled as usual.  Try it
     # first in case we can't connect() to the cluster
+
+    format = parsed_args.output_format
+
     if parsed_args.admin_socket:
         try:
-            print admin_socket(parsed_args.admin_socket, childargs)
+            print admin_socket(parsed_args.admin_socket, childargs, format)
         except Exception as e:
             print >> sys.stderr, 'admin_socket: {0}'.format(e)
         return 0
@@ -520,7 +508,7 @@ def main():
         if len(childargs) > 2:
             if childargs[1].find('/') >= 0:
                 try:
-                    print admin_socket(childargs[1], childargs[2:])
+                    print admin_socket(childargs[1], childargs[2:], format)
                 except Exception as e:
                     print >> sys.stderr, 'admin_socket: {0}'.format(e)
                 return 0
@@ -528,7 +516,7 @@ def main():
                 # try resolve daemon name
                 path = ceph_conf('admin_socket', childargs[1])
                 try:
-                    print admin_socket(path, childargs[2:])
+                    print admin_socket(path, childargs[2:], format)
                 except Exception as e:
                     print >> sys.stderr, 'admin_socket: {0}'.format(e)
                 return 0
@@ -583,8 +571,8 @@ def main():
             format(e.__class__.__name__)
         return 1
 
-    if parsed_args.help or parsed_args.help_all:
-        return do_help(parser, childargs, parsed_args.help_all)
+    if parsed_args.help:
+        return do_help(parser, childargs)
 
     # implement -w/--watch_*
     # This is ugly, but Namespace() isn't quite rich enough.
diff --git a/src/ceph_authtool.cc b/src/ceph_authtool.cc
index 3075d9c69a7..f66a3c66eee 100644
--- a/src/ceph_authtool.cc
+++ b/src/ceph_authtool.cc
@@ -36,9 +36,15 @@ void usage()
        << "                                'mount -o secret=..' argument\n"
        << "  -C, --create-keyring          will create a new keyring, overwriting any\n"
        << "                                existing keyringfile\n"
-       << "  --gen-key                     will generate a new secret key for the\n"
+       << "  -g, --gen-key                 will generate a new secret key for the\n"
        << "                                specified entityname\n"
-       << "  --add-key                     will add an encoded key to the keyring\n"
+       << "  --gen-print-key               will generate a new secret key without set it\n"
+       << "                                to the keyringfile, prints the secret to stdout\n"
+       << "  --import-keyring              will import the content of a given keyring\n"
+       << "                                into the keyringfile\n"
+       << "  -u, --set-uid                 sets the auid (authenticated user id) for the\n"
+       << "                                specified entityname\n"
+       << "  -a, --add-key                 will add an encoded key to the keyring\n"
        << "  --cap subsystem capability    will set the capability for given subsystem\n"
        << "  --caps capsfile               will set all of capabilities associated with a\n"
        << "                                given key, for all subsystems"
diff --git a/src/client/Client.cc b/src/client/Client.cc
index eb7502c1530..5a9c5fdafcc 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -102,22 +102,24 @@ Client::CommandHook::CommandHook(Client *client) :
 {
 }
 
-bool Client::CommandHook::call(std::string command, std::string args, bufferlist& out)
+bool Client::CommandHook::call(std::string command, std::string args,
+			       std::string format, bufferlist& out)
 {
   stringstream ss;
-  JSONFormatter formatter(true);
+  Formatter *f = new_formatter(format);
   m_client->client_lock.Lock();
   if (command == "mds_requests")
-    m_client->dump_mds_requests(&formatter);
+    m_client->dump_mds_requests(f);
   else if (command == "mds_sessions")
-    m_client->dump_mds_sessions(&formatter);
+    m_client->dump_mds_sessions(f);
   else if (command == "dump_cache")
-    m_client->dump_cache(&formatter);
+    m_client->dump_cache(f);
   else
     assert(0 == "bad command registered");
   m_client->client_lock.Unlock();
-  formatter.flush(ss);
+  f->flush(ss);
   out.append(ss);
+  delete f;
   return true;
 }
 
@@ -1303,7 +1305,7 @@ int Client::make_request(MetaRequest *request,
       // wait
       if (session->state == MetaSession::STATE_OPENING) {
 	ldout(cct, 10) << "waiting for session to mds." << mds << " to open" << dendl;
-	wait_on_list(session->waiting_for_open);
+	wait_on_context_list(session->waiting_for_open);
 	continue;
       }
 
@@ -1522,7 +1524,7 @@ void Client::_closed_mds_session(MetaSession *s)
 {
   s->state = MetaSession::STATE_CLOSED;
   messenger->mark_down(s->con);
-  signal_cond_list(s->waiting_for_open);
+  signal_context_list(s->waiting_for_open);
   mount_cond.Signal();
   remove_session_caps(s);
   kick_requests(s, true);
@@ -1549,7 +1551,7 @@ void Client::handle_client_session(MClientSession *m)
     if (!unmounting) {
       connect_mds_targets(from);
     }
-    signal_cond_list(session->waiting_for_open);
+    signal_context_list(session->waiting_for_open);
     break;
 
   case CEPH_SESSION_CLOSE:
@@ -1900,7 +1902,7 @@ void Client::handle_mds_map(MMDSMap* m)
       if (oldstate < MDSMap::STATE_ACTIVE) {
 	kick_requests(p->second, false);
 	kick_flushing_caps(p->second);
-	signal_cond_list(p->second->waiting_for_open);
+	signal_context_list(p->second->waiting_for_open);
 	kick_maxsize_requests(p->second);
 	wake_inode_waiters(p->second);
       }
@@ -2607,6 +2609,24 @@ void Client::signal_cond_list(list<Cond*>& ls)
     (*it)->Signal();
 }
 
+void Client::wait_on_context_list(list<Context*>& ls)
+{
+  Cond cond;
+  bool done = false;
+  int r;
+  ls.push_back(new C_Cond(&cond, &done, &r));
+  while (!done)
+    cond.Wait(client_lock);
+}
+
+void Client::signal_context_list(list<Context*>& ls)
+{
+  while (!ls.empty()) {
+    ls.front()->complete(0);
+    ls.pop_front();
+  }
+}
+
 void Client::wake_inode_waiters(MetaSession *s)
 {
   xlist<Cap*>::iterator iter = s->caps.begin();
@@ -7915,7 +7935,7 @@ void Client::ms_handle_remote_reset(Connection *con)
 	case MetaSession::STATE_OPENING:
 	  {
 	    ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl;
-	    list<Cond*> waiters;
+	    list<Context*> waiters;
 	    waiters.swap(s->waiting_for_open);
 	    _closed_mds_session(s);
 	    MetaSession *news = _get_or_open_mds_session(mds);
diff --git a/src/client/Client.h b/src/client/Client.h
index 96e8937f287..bc1fbc0401b 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -196,7 +196,8 @@ class Client : public Dispatcher {
     Client *m_client;
   public:
     CommandHook(Client *client);
-    bool call(std::string command, std::string args, bufferlist& out);
+    bool call(std::string command, std::string args, std::string format,
+	      bufferlist& out);
   };
   CommandHook m_command_hook;
 
@@ -344,6 +345,9 @@ protected:
   void wait_on_list(list<Cond*>& ls);
   void signal_cond_list(list<Cond*>& ls);
 
+  void wait_on_context_list(list<Context*>& ls);
+  void signal_context_list(list<Context*>& ls);
+
   // -- metadata cache stuff
 
   // decrease inode ref.  delete if dangling.
diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h
index 1f6a1240617..01575efb6ba 100644
--- a/src/client/MetaSession.h
+++ b/src/client/MetaSession.h
@@ -35,7 +35,7 @@ struct MetaSession {
     STATE_CLOSED,
   } state;
 
-  list<Cond*> waiting_for_open;
+  list<Context*> waiting_for_open;
 
   xlist<Cap*> caps;
   xlist<Inode*> flushing_caps;
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index ce0c6de1260..0812c9a3728 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -509,6 +509,7 @@ static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino)
     fuse_reply_err(req, -r);
 }
 
+#if 0
 static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
 {
 #ifdef HAVE_FUSE_GETGROUPS
@@ -534,6 +535,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
 #endif
   return 0;
 }
+#endif
 
 static void invalidate_cb(void *handle, vinodeno_t vino, int64_t off, int64_t len)
 {
@@ -702,8 +704,20 @@ int CephFuse::Handle::init(int argc, const char *argv[])
 
   fuse_session_add_chan(se, ch);
 
+  /*
+   * this is broken:
+   *
+   * - the cb needs the request handle to be useful; we should get the
+   *   gids in the method here in fuse_ll.c and pass the gid list in,
+   *   not use a callback.
+   * - the callback mallocs the list but it is not free()'d
+   *
+   * so disable it for now...
+
   client->ll_register_getgroups_cb(getgroups_cb, this);
 
+   */
+
   if (g_conf->fuse_use_invalidate_cb)
     client->ll_register_ino_invalidate_cb(invalidate_cb, this);
 
diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc
index de2abe5665b..6cda4cba5c3 100644
--- a/src/cls/rgw/cls_rgw.cc
+++ b/src/cls/rgw/cls_rgw.cc
@@ -815,7 +815,7 @@ static int bi_log_iterate_entries(cls_method_context_t hctx, const string& marke
   map<string, bufferlist> keys;
   string filter_prefix, end_key;
   bufferlist start_bl;
-  bool start_key_added;
+  bool start_key_added = false;
   uint32_t i = 0;
   string key;
 
diff --git a/src/common/Formatter.cc b/src/common/Formatter.cc
index 357b287fe32..c08ea5b9a20 100644
--- a/src/common/Formatter.cc
+++ b/src/common/Formatter.cc
@@ -62,15 +62,19 @@ Formatter::~Formatter()
 }
 
 Formatter *
-new_formatter(const std::string &type)
+new_formatter(const std::string type)
 {
-    if (type == "json")
+    std::string mytype = type;
+    if (mytype == "")
+      mytype = "json-pretty";
+
+    if (mytype == "json")
       return new JSONFormatter(false);
-    else if (type == "json-pretty")
+    else if (mytype == "json-pretty")
       return new JSONFormatter(true);
-    else if (type == "xml")
+    else if (mytype == "xml")
       return new XMLFormatter(false);
-    else if (type == "xml-pretty")
+    else if (mytype == "xml-pretty")
       return new XMLFormatter(true);
     else
       return (Formatter *)NULL;
@@ -250,6 +254,18 @@ void JSONFormatter::dump_format(const char *name, const char *fmt, ...)
   print_quoted_string(buf);
 }
 
+void JSONFormatter::dump_format_unquoted(const char *name, const char *fmt, ...)
+{
+  char buf[LARGE_SIZE];
+  va_list ap;
+  va_start(ap, fmt);
+  vsnprintf(buf, LARGE_SIZE, fmt, ap);
+  va_end(ap);
+
+  print_name(name);
+  m_ss << buf;
+}
+
 int JSONFormatter::get_len() const
 {
   return m_ss.str().size();
@@ -400,6 +416,21 @@ void XMLFormatter::dump_format(const char *name, const char *fmt, ...)
     m_ss << "\n";
 }
 
+void XMLFormatter::dump_format_unquoted(const char *name, const char *fmt, ...)
+{
+  char buf[LARGE_SIZE];
+  va_list ap;
+  va_start(ap, fmt);
+  vsnprintf(buf, LARGE_SIZE, fmt, ap);
+  va_end(ap);
+
+  std::string e(name);
+  print_spaces();
+  m_ss << "<" << e << ">" << buf << "</" << e << ">";
+  if (m_pretty)
+    m_ss << "\n";
+}
+
 int XMLFormatter::get_len() const
 {
   return m_ss.str().size();
diff --git a/src/common/Formatter.h b/src/common/Formatter.h
index 8775c0cf9df..da730103f41 100644
--- a/src/common/Formatter.h
+++ b/src/common/Formatter.h
@@ -45,6 +45,7 @@ class Formatter {
   virtual void dump_string(const char *name, std::string s) = 0;
   virtual std::ostream& dump_stream(const char *name) = 0;
   virtual void dump_format(const char *name, const char *fmt, ...) = 0;
+  virtual void dump_format_unquoted(const char *name, const char *fmt, ...) = 0;
   virtual int get_len() const = 0;
   virtual void write_raw_data(const char *data) = 0;
 
@@ -60,7 +61,7 @@ class Formatter {
   }
 };
 
-Formatter *new_formatter(const std::string &type);
+Formatter *new_formatter(const std::string type);
 
 class JSONFormatter : public Formatter {
  public:
@@ -79,6 +80,7 @@ class JSONFormatter : public Formatter {
   void dump_string(const char *name, std::string s);
   std::ostream& dump_stream(const char *name);
   void dump_format(const char *name, const char *fmt, ...);
+  void dump_format_unquoted(const char *name, const char *fmt, ...);
   int get_len() const;
   void write_raw_data(const char *data);
 
@@ -119,6 +121,7 @@ class XMLFormatter : public Formatter {
   void dump_string(const char *name, std::string s);
   std::ostream& dump_stream(const char *name);
   void dump_format(const char *name, const char *fmt, ...);
+  void dump_format_unquoted(const char *name, const char *fmt, ...);
   int get_len() const;
   void write_raw_data(const char *data);
 
diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc
index f7ab3501dff..e73f3ce0a0c 100644
--- a/src/common/admin_socket.cc
+++ b/src/common/admin_socket.cc
@@ -310,6 +310,18 @@ bool AdminSocket::do_accept()
 
   bool rval = false;
 
+  map<string, cmd_vartype> cmdmap;
+  string format;
+  vector<string> cmdvec;
+  stringstream errss;
+  cmdvec.push_back(cmd);
+  if (!cmdmap_from_json(cmdvec, &cmdmap, errss)) {
+    ldout(m_cct, 0) << "AdminSocket: " << errss << dendl;
+    return false;
+  }
+  cmd_getval(m_cct, cmdmap, "format", format);
+  cmd_getval(m_cct, cmdmap, "prefix", c);
+
   string firstword;
   if (c.find(" ") == string::npos)
     firstword = c;
@@ -341,7 +353,7 @@ bool AdminSocket::do_accept()
     string args;
     if (match != c)
       args = c.substr(match.length() + 1);
-    bool success = p->second->call(match, args, out);
+    bool success = p->second->call(match, args, format, out);
     if (!success) {
       ldout(m_cct, 0) << "AdminSocket: request '" << match << "' args '" << args
 		      << "' to " << p->second << " failed" << dendl;
@@ -378,8 +390,7 @@ int AdminSocket::register_command(std::string command, std::string cmddesc, Admi
     ldout(m_cct, 5) << "register_command " << command << " hook " << hook << dendl;
     m_hooks[command] = hook;
     m_descs[command] = cmddesc;
-    if (help.length())
-      m_help[command] = help;
+    m_help[command] = help;
     ret = 0;
   }  
   m_lock.Unlock();
@@ -406,7 +417,8 @@ int AdminSocket::unregister_command(std::string command)
 
 class VersionHook : public AdminSocketHook {
 public:
-  virtual bool call(std::string command, std::string args, bufferlist& out) {
+  virtual bool call(std::string command, std::string args, std::string format,
+		    bufferlist& out) {
     if (command == "0") {
       out.append(CEPH_ADMIN_SOCK_VERSION);
     } else {
@@ -429,18 +441,20 @@ class HelpHook : public AdminSocketHook {
   AdminSocket *m_as;
 public:
   HelpHook(AdminSocket *as) : m_as(as) {}
-  bool call(string command, string args, bufferlist& out) {
-    JSONFormatter jf(true);
-    jf.open_object_section("help");
+  bool call(string command, string args, string format, bufferlist& out) {
+    Formatter *f = new_formatter(format);
+    f->open_object_section("help");
     for (map<string,string>::iterator p = m_as->m_help.begin();
 	 p != m_as->m_help.end();
 	 ++p) {
-      jf.dump_string(p->first.c_str(), p->second);
+      if (p->second.length())
+	f->dump_string(p->first.c_str(), p->second);
     }
-    jf.close_section();
+    f->close_section();
     ostringstream ss;
-    jf.flush(ss);
+    f->flush(ss);
     out.append(ss.str());
+    delete f;
     return true;
   }
 };
@@ -449,7 +463,7 @@ class GetdescsHook : public AdminSocketHook {
   AdminSocket *m_as;
 public:
   GetdescsHook(AdminSocket *as) : m_as(as) {}
-  bool call(string command, string args, bufferlist& out) {
+  bool call(string command, string args, string format, bufferlist& out) {
     int cmdnum = 0;
     JSONFormatter jf(false);
     jf.open_object_section("command_descriptions");
diff --git a/src/common/admin_socket.h b/src/common/admin_socket.h
index c390bca0382..30c5eb96ab8 100644
--- a/src/common/admin_socket.h
+++ b/src/common/admin_socket.h
@@ -29,7 +29,8 @@ class CephContext;
 
 class AdminSocketHook {
 public:
-  virtual bool call(std::string command, std::string args, bufferlist& out) = 0;
+  virtual bool call(std::string command, std::string args, std::string format,
+		    bufferlist& out) = 0;
   virtual ~AdminSocketHook() {};
 };
 
diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc
index cad980bb2a6..6b227d8689e 100644
--- a/src/common/ceph_context.cc
+++ b/src/common/ceph_context.cc
@@ -156,44 +156,46 @@ class CephContextHook : public AdminSocketHook {
 public:
   CephContextHook(CephContext *cct) : m_cct(cct) {}
 
-  bool call(std::string command, std::string args, bufferlist& out) {
-    m_cct->do_command(command, args, &out);
+  bool call(std::string command, std::string args, std::string format,
+	    bufferlist& out) {
+    m_cct->do_command(command, args, format, &out);
     return true;
   }
 };
 
-void CephContext::do_command(std::string command, std::string args, bufferlist *out)
+void CephContext::do_command(std::string command, std::string args,
+			     std::string format, bufferlist *out)
 {
+  Formatter *f = new_formatter(format);
   lgeneric_dout(this, 1) << "do_command '" << command << "' '" << args << "'" << dendl;
   if (command == "perfcounters_dump" || command == "1" ||
       command == "perf dump") {
-    _perf_counters_collection->write_json_to_buf(*out, false);
+    _perf_counters_collection->dump_formatted(f, false);
   }
   else if (command == "perfcounters_schema" || command == "2" ||
     command == "perf schema") {
-    _perf_counters_collection->write_json_to_buf(*out, true);
+    _perf_counters_collection->dump_formatted(f, true);
   }
   else {
-    JSONFormatter jf(true);
-    jf.open_object_section(command.c_str());
+    f->open_object_section(command.c_str());
     if (command == "config show") {
-      _conf->show_config(&jf);
+      _conf->show_config(f);
     }
     else if (command == "config set") {
       std::string var = args;
       size_t pos = var.find(' ');
       if (pos == string::npos) {
-        jf.dump_string("error", "syntax error: 'config set <var> <value>'");
+        f->dump_string("error", "syntax error: 'config set <var> <value>'");
       } else {
         std::string val = var.substr(pos+1);
         var.resize(pos);
         int r = _conf->set_val(var.c_str(), val.c_str());
         if (r < 0) {
-          jf.dump_stream("error") << "error setting '" << var << "' to '" << val << "': " << cpp_strerror(r);
+          f->dump_stream("error") << "error setting '" << var << "' to '" << val << "': " << cpp_strerror(r);
         } else {
           ostringstream ss;
           _conf->apply_changes(&ss);
-          jf.dump_string("success", ss.str());
+          f->dump_string("success", ss.str());
         }
       }
     } else if (command == "config get") {
@@ -202,9 +204,9 @@ void CephContext::do_command(std::string command, std::string args, bufferlist *
         char *tmp = buf;
         int r = _conf->get_val(args.c_str(), &tmp, sizeof(buf));
         if (r < 0) {
-            jf.dump_stream("error") << "error getting '" << args << "': " << cpp_strerror(r);
+            f->dump_stream("error") << "error getting '" << args << "': " << cpp_strerror(r);
         } else {
-            jf.dump_string(args.c_str(), buf);
+            f->dump_string(args.c_str(), buf);
         }
     } else if (command == "log flush") {
       _log->flush();
@@ -218,11 +220,10 @@ void CephContext::do_command(std::string command, std::string args, bufferlist *
     else {
       assert(0 == "registered under wrong command?");    
     }
-    ostringstream ss;
-    jf.close_section();
-    jf.flush(ss);
-    out->append(ss.str());
+    f->close_section();
   }
+  f->flush(*out);
+  delete f;
   lgeneric_dout(this, 1) << "do_command '" << command << "' '" << args << "' result is " << out->length() << " bytes" << dendl;
 };
 
diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h
index 1678680fa6d..85618e35219 100644
--- a/src/common/ceph_context.h
+++ b/src/common/ceph_context.h
@@ -97,7 +97,8 @@ public:
   /**
    * process an admin socket command
    */
-  void do_command(std::string command, std::string args, bufferlist *out);
+  void do_command(std::string command, std::string args, std::string foramt,
+		  bufferlist *out);
 
   /**
    * get a crypto handler
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index b43808e211c..3b9d025393f 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -443,6 +443,7 @@ OPTION(osd_deep_scrub_stride, OPT_INT, 524288)
 OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100)
 OPTION(osd_auto_weight, OPT_BOOL, false)
 OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored
+OPTION(osd_open_classes_on_start, OPT_BOOL, true)
 OPTION(osd_check_for_log_corruption, OPT_BOOL, false)
 OPTION(osd_use_stale_snap, OPT_BOOL, false)
 OPTION(osd_rollback_to_cluster_snap, OPT_STR, "")
@@ -501,20 +502,20 @@ OPTION(osd_max_attr_size, OPT_U64, 65536)
 OPTION(filestore, OPT_BOOL, false)
 
 /// filestore wb throttle limits
-OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 10<<20)
-OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 100<<20)
-OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 100)
-OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 1000)
-OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 100)
-OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 10<<20)
-OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 100<<20)
-OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 10)
-OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 100)
-OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 10)
+OPTION(filestore_wbthrottle_btrfs_bytes_start_flusher, OPT_U64, 41943040)
+OPTION(filestore_wbthrottle_btrfs_bytes_hard_limit, OPT_U64, 419430400)
+OPTION(filestore_wbthrottle_btrfs_ios_start_flusher, OPT_U64, 500)
+OPTION(filestore_wbthrottle_btrfs_ios_hard_limit, OPT_U64, 5000)
+OPTION(filestore_wbthrottle_btrfs_inodes_start_flusher, OPT_U64, 500)
+OPTION(filestore_wbthrottle_xfs_bytes_start_flusher, OPT_U64, 41943040)
+OPTION(filestore_wbthrottle_xfs_bytes_hard_limit, OPT_U64, 419430400)
+OPTION(filestore_wbthrottle_xfs_ios_start_flusher, OPT_U64, 500)
+OPTION(filestore_wbthrottle_xfs_ios_hard_limit, OPT_U64, 5000)
+OPTION(filestore_wbthrottle_xfs_inodes_start_flusher, OPT_U64, 500)
 
 /// These must be less than the fd limit
-OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 256)
-OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 100)
+OPTION(filestore_wbthrottle_btrfs_inodes_hard_limit, OPT_U64, 5000)
+OPTION(filestore_wbthrottle_xfs_inodes_hard_limit, OPT_U64, 5000)
 
 // Tests index failure paths
 OPTION(filestore_index_retry_probability, OPT_DOUBLE, 0)
diff --git a/src/common/fiemap.cc b/src/common/fiemap.cc
deleted file mode 100644
index a1d5fbe9396..00000000000
--- a/src/common/fiemap.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2010 Canonical
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * 
- */
-
-/*
- *  Author Colin Ian King,  colin.king@canonical.com
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-
-#if defined(__linux__)
-#include <linux/fs.h>
-#endif
-#include "include/inttypes.h"
-#include "include/fiemap.h"
-
-struct fiemap *read_fiemap(int fd)
-{
-  struct fiemap *fiemap;
-  struct fiemap *_realloc_fiemap = NULL;
-  int extents_size;
-  int r;
-
-  if ((fiemap = (struct fiemap*)malloc(sizeof(struct fiemap))) == NULL) {
-    fprintf(stderr, "Out of memory allocating fiemap\n");
-    return NULL;
-  }
-  memset(fiemap, 0, sizeof(struct fiemap));
-
-  fiemap->fm_start = 0;
-  fiemap->fm_length = ~0;		/* Lazy */
-  fiemap->fm_flags = 0;
-  fiemap->fm_extent_count = 0;
-  fiemap->fm_mapped_extents = 0;
-
-  /* Find out how many extents there are */
-  r = ioctl(fd, FS_IOC_FIEMAP, fiemap);
-  if (r < 0) {
-    goto done_err;
-  }
-
-  if (!fiemap->fm_mapped_extents) {
-    goto done_err;
-  }
-
-  /* Read in the extents */
-  extents_size = sizeof(struct fiemap_extent) * (fiemap->fm_mapped_extents);
-
-  /* Resize fiemap to allow us to read in the extents */
-
-  if ((_realloc_fiemap = (struct fiemap*)realloc(fiemap,sizeof(struct fiemap) +
-                                        extents_size)) == NULL) {
-    fprintf(stderr, "Out of memory allocating fiemap\n");
-    goto done_err;
-  } else {
-    fiemap = _realloc_fiemap;
-  }
-
-  memset(fiemap->fm_extents, 0, extents_size);
-  fiemap->fm_extent_count = fiemap->fm_mapped_extents;
-  fiemap->fm_mapped_extents = 0;
-
-  if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) {
-    fprintf(stderr, "fiemap ioctl() failed\n");
-    goto done_err;
-  }
-
-  return fiemap;
-done_err:
-  free(fiemap);
-  return NULL;
-}
-
diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc
index 67a777497b3..1dd4cdabd9d 100644
--- a/src/common/perf_counters.cc
+++ b/src/common/perf_counters.cc
@@ -15,6 +15,7 @@
 #include "common/perf_counters.h"
 #include "common/dout.h"
 #include "common/errno.h"
+#include "common/Formatter.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -72,21 +73,20 @@ void PerfCountersCollection::clear()
   }
 }
 
-void PerfCountersCollection::write_json_to_buf(bufferlist& bl, bool schema)
+void PerfCountersCollection::dump_formatted(Formatter *f, bool schema)
 {
   Mutex::Locker lck(m_lock);
-  bl.append('{');
+  f->open_object_section("perfcounter_collection");
   perf_counters_set_t::iterator l = m_loggers.begin();
   perf_counters_set_t::iterator l_end = m_loggers.end();
   if (l != l_end) {
     while (true) {
-      (*l)->write_json_to_buf(bl, schema);
+      (*l)->dump_formatted(f, schema);
       if (++l == l_end)
 	break;
-      bl.append(',');
     }
   }
-  bl.append('}');
+  f->close_section();
 }
 
 // ---------------------------
@@ -203,34 +203,54 @@ utime_t PerfCounters::tget(int idx) const
   return utime_t(data.u64 / 1000000000ull, data.u64 % 1000000000ull);
 }
 
-void PerfCounters::write_json_to_buf(bufferlist& bl, bool schema)
+void PerfCounters::dump_formatted(Formatter *f, bool schema)
 {
-  char buf[512];
   Mutex::Locker lck(m_lock);
 
-  snprintf(buf, sizeof(buf), "\"%s\":{", m_name.c_str());
-  bl.append(buf);
-
+  f->open_object_section(m_name.c_str());
   perf_counter_data_vec_t::const_iterator d = m_data.begin();
   perf_counter_data_vec_t::const_iterator d_end = m_data.end();
   if (d == d_end) {
-    bl.append('}');
+    f->close_section();
     return;
   }
   while (true) {
-    const perf_counter_data_any_d &data(*d);
-    buf[0] = '\0';
-    if (schema)
-      data.write_schema_json(buf, sizeof(buf));
-    else
-      data.write_json(buf, sizeof(buf));
-
-    bl.append(buf);
+    if (schema) {
+      f->open_object_section(d->name);
+      f->dump_int("type", d->type);
+      f->close_section();
+    } else {
+      if (d->type & PERFCOUNTER_LONGRUNAVG) {
+	f->open_object_section(d->name);
+	if (d->type & PERFCOUNTER_U64) {
+	  f->dump_unsigned("avgcount", d->avgcount);
+	  f->dump_unsigned("sum", d->u64);
+	} else if (d->type & PERFCOUNTER_TIME) {
+	  f->dump_unsigned("avgcount", d->avgcount);
+	  f->dump_format_unquoted("sum", "%"PRId64".%09"PRId64,
+				  d->u64 / 1000000000ull,
+				  d->u64 % 1000000000ull);
+	} else {
+	  assert(0);
+	}
+	f->close_section();
+      } else {
+	if (d->type & PERFCOUNTER_U64) {
+	  f->dump_unsigned(d->name, d->u64);
+	} else if (d->type & PERFCOUNTER_TIME) {
+	  f->dump_format_unquoted(d->name, "%"PRId64".%09"PRId64,
+				  d->u64 / 1000000000ull,
+				  d->u64 % 1000000000ull);
+	} else {
+	  assert(0);
+	}
+      }
+    }
+
     if (++d == d_end)
       break;
-    bl.append(',');
   }
-  bl.append('}');
+  f->close_section();
 }
 
 const std::string &PerfCounters::get_name() const
@@ -258,42 +278,6 @@ PerfCounters::perf_counter_data_any_d::perf_counter_data_any_d()
 {
 }
 
-void  PerfCounters::perf_counter_data_any_d::write_schema_json(char *buf, size_t buf_sz) const
-{
-  snprintf(buf, buf_sz, "\"%s\":{\"type\":%d}", name, type);
-}
-
-void  PerfCounters::perf_counter_data_any_d::write_json(char *buf, size_t buf_sz) const
-{
-  if (type & PERFCOUNTER_LONGRUNAVG) {
-    if (type & PERFCOUNTER_U64) {
-      snprintf(buf, buf_sz, "\"%s\":{\"avgcount\":%" PRId64 ","
-	      "\"sum\":%" PRId64 "}", 
-	      name, avgcount, u64);
-    }
-    else if (type & PERFCOUNTER_TIME) {
-      snprintf(buf, buf_sz, "\"%s\":{\"avgcount\":%" PRId64 ","
-	      "\"sum\":%llu.%09llu}",
-	       name, avgcount, u64 / 1000000000ull, u64 % 1000000000ull);
-    }
-    else {
-      assert(0);
-    }
-  }
-  else {
-    if (type & PERFCOUNTER_U64) {
-      snprintf(buf, buf_sz, "\"%s\":%" PRId64,
-	       name, u64);
-    }
-    else if (type & PERFCOUNTER_TIME) {
-      snprintf(buf, buf_sz, "\"%s\":%llu.%09llu", name, u64 / 1000000000ull, u64 % 1000000000ull);
-    }
-    else {
-      assert(0);
-    }
-  }
-}
-
 PerfCountersBuilder::PerfCountersBuilder(CephContext *cct, const std::string &name,
                   int first, int last)
   : m_perf_counters(new PerfCounters(cct, name, first, last))
diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h
index 269a32f2c46..ec10f9a9282 100644
--- a/src/common/perf_counters.h
+++ b/src/common/perf_counters.h
@@ -76,7 +76,7 @@ public:
   void tinc(int idx, utime_t v);
   utime_t tget(int idx) const;
 
-  void write_json_to_buf(ceph::bufferlist& bl, bool schema);
+  void dump_formatted(ceph::Formatter *f, bool schema);
 
   const std::string& get_name() const;
   void set_name(std::string s) {
@@ -136,7 +136,7 @@ public:
   void add(class PerfCounters *l);
   void remove(class PerfCounters *l);
   void clear();
-  void write_json_to_buf(ceph::bufferlist& bl, bool schema);
+  void dump_formatted(ceph::Formatter *f, bool schema);
 private:
   CephContext *m_cct;
 
diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp
index 8669d063a79..a62aa0d9ce3 100644
--- a/src/common/sharedptr_registry.hpp
+++ b/src/common/sharedptr_registry.hpp
@@ -29,6 +29,7 @@ class SharedPtrRegistry {
 public:
   typedef std::tr1::shared_ptr<V> VPtr;
   typedef std::tr1::weak_ptr<V> WeakVPtr;
+  int waiting;
 private:
   Mutex lock;
   Cond cond;
@@ -52,7 +53,10 @@ private:
   friend class OnRemoval;
 
 public:
-  SharedPtrRegistry() : lock("SharedPtrRegistry::lock") {}
+  SharedPtrRegistry() :
+    waiting(0),
+    lock("SharedPtrRegistry::lock")
+  {}
 
   bool get_next(const K &key, pair<K, V> *next) {
     VPtr next_val;
@@ -70,26 +74,33 @@ public:
 
   VPtr lookup(const K &key) {
     Mutex::Locker l(lock);
+    waiting++;
     while (1) {
       if (contents.count(key)) {
 	VPtr retval = contents[key].lock();
-	if (retval)
+	if (retval) {
+	  waiting--;
 	  return retval;
+	}
       } else {
 	break;
       }
       cond.Wait(lock);
     }
+    waiting--;
     return VPtr();
   }
 
   VPtr lookup_or_create(const K &key) {
     Mutex::Locker l(lock);
+    waiting++;
     while (1) {
       if (contents.count(key)) {
 	VPtr retval = contents[key].lock();
-	if (retval)
+	if (retval) {
+	  waiting--;
 	  return retval;
+	}
       } else {
 	break;
       }
@@ -97,6 +108,7 @@ public:
     }
     VPtr retval(new V(), OnRemoval(this, key));
     contents[key] = retval;
+    waiting--;
     return retval;
   }
 
@@ -109,11 +121,14 @@ public:
   template<class A>
   VPtr lookup_or_create(const K &key, const A &arg) {
     Mutex::Locker l(lock);
+    waiting++;
     while (1) {
       if (contents.count(key)) {
 	VPtr retval = contents[key].lock();
-	if (retval)
+	if (retval) {
+	  waiting--;
 	  return retval;
+	}
       } else {
 	break;
       }
@@ -121,8 +136,11 @@ public:
     }
     VPtr retval(new V(arg), OnRemoval(this, key));
     contents[key] = retval;
+    waiting--;
     return retval;
   }
+
+  friend class SharedPtrRegistryTest;
 };
 
 #endif
diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc
index 25f1a0a1992..ce604fe1e5d 100644
--- a/src/global/signal_handler.cc
+++ b/src/global/signal_handler.cc
@@ -19,6 +19,7 @@
 #include "global/pidfile.h"
 #include "global/signal_handler.h"
 
+#include <poll.h>
 #include <signal.h>
 #include <sstream>
 #include <stdlib.h>
@@ -189,25 +190,27 @@ struct SignalHandler : public Thread {
   // thread entry point
   void *entry() {
     while (!stop) {
-      // create fd set
-      fd_set rfds;
-      FD_ZERO(&rfds);
-      FD_SET(pipefd[0], &rfds);
-      int max_fd = pipefd[0];
+      // build fd list
+      struct pollfd fds[33];
 
       lock.Lock();
+      int num_fds = 0;
+      fds[num_fds].fd = pipefd[0];
+      fds[num_fds].events = POLLIN | POLLOUT | POLLERR;
+      fds[num_fds].revents = 0;
+      ++num_fds;
       for (unsigned i=0; i<32; i++) {
 	if (handlers[i]) {
-	  int fd = handlers[i]->pipefd[0];
-	  FD_SET(fd, &rfds);
-	  if (fd > max_fd)
-	    max_fd = fd;
+	  fds[num_fds].fd = handlers[i]->pipefd[0];
+	  fds[num_fds].events = POLLIN | POLLOUT | POLLERR;
+	  fds[num_fds].revents = 0;
+	  ++num_fds;
 	}
       }
       lock.Unlock();
 
       // wait for data on any of those pipes
-      int r = select(max_fd + 1, &rfds, NULL, NULL, NULL);
+      int r = poll(fds, num_fds, -1);
       if (stop)
 	break;
       if (r > 0) {
diff --git a/src/include/fiemap.h b/src/include/fiemap.h
deleted file mode 100644
index 846adb155ff..00000000000
--- a/src/include/fiemap.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __CEPH_FIEMAP_H
-#define __CEPH_FIEMAP_H
-
-#include "acconfig.h"
-
-/*
- * the header is missing on most systems.  for the time being at
- * least, include our own copy in the repo.
- */
-#ifdef HAVE_FIEMAP_H
-# include <linux/fiemap.h>
-#else
-# include "linux_fiemap.h"
-#endif
-
-#if defined(__linux__)
-#include <linux/ioctl.h>
-#elif defined(__FreeBSD__)
-#include <sys/ioctl.h>
-#endif
-#ifndef FS_IOC_FIEMAP
-# define FS_IOC_FIEMAP                        _IOWR('f', 11, struct fiemap)
-#endif
-
-extern "C" struct fiemap *read_fiemap(int fd);
-
-#endif
diff --git a/src/init-ceph.in b/src/init-ceph.in
index b0ed353f8e7..7d003e6370c 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -27,7 +27,7 @@ else
 fi
 
 usage_exit() {
-    echo "usage: $0 [options] {start|stop|restart} [mon|osd|mds]..."
+    echo "usage: $0 [options] {start|stop|restart|condrestart} [mon|osd|mds]..."
     printf "\t-c ceph.conf\n"
     printf "\t--valgrind\trun via valgrind\n"
     printf "\t--hostname [hostname]\toverride hostname lookup\n"
@@ -241,7 +241,7 @@ for name in $what; do
     case "$command" in
 	start)
             # Increase max_open_files, if the configuration calls for it.
-            get_conf max_open_files "8192" "max open files"
+            get_conf max_open_files "32768" "max open files"
 
             # build final command
 	    wrap=""
@@ -414,6 +414,15 @@ for name in $what; do
 	    $0 $options start $name
 	    ;;
 
+        condrestart)
+            if daemon_is_running $name ceph-$type $id $pid_file; then
+                $0 $options stop $name
+                $0 $options start $name
+            else
+                echo "$name: not running."
+            fi
+            ;;
+
 	cleanlogs)
 	    echo removing logs
 	    [ -n "$log_dir" ] && do_cmd "rm -f $log_dir/$type.$id.*"
diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc
index f68125fb8c0..e8dd019af3a 100644
--- a/src/librados/RadosClient.cc
+++ b/src/librados/RadosClient.cc
@@ -642,6 +642,10 @@ int librados::RadosClient::osd_command(int osd, vector<string>& cmd,
   bool done;
   int ret;
   tid_t tid;
+
+  if (osd < 0)
+    return -EINVAL;
+
   lock.Lock();
   // XXX do anything with tid?
   int r = objecter->osd_command(osd, cmd, inbl, &tid, poutbl, prs,
diff --git a/src/messages/MMonScrub.h b/src/messages/MMonScrub.h
index ab4588f4a76..b16728bcdd5 100644
--- a/src/messages/MMonScrub.h
+++ b/src/messages/MMonScrub.h
@@ -31,7 +31,7 @@ public:
     switch (op) {
     case OP_SCRUB: return "scrub";
     case OP_RESULT: return "result";
-    default: assert("unknown op type"); return NULL;
+    default: assert(0 == "unknown op type"); return NULL;
     }
   }
 
diff --git a/src/messages/MMonSync.h b/src/messages/MMonSync.h
index a5415a8f451..48229d15bcc 100644
--- a/src/messages/MMonSync.h
+++ b/src/messages/MMonSync.h
@@ -49,7 +49,7 @@ public:
     case OP_CHUNK: return "chunk";
     case OP_LAST_CHUNK: return "last_chunk";
     case OP_NO_COOKIE: return "no_cookie";
-    default: assert("unknown op type"); return NULL;
+    default: assert(0 == "unknown op type"); return NULL;
     }
   }
 
diff --git a/src/mon/DataHealthService.cc b/src/mon/DataHealthService.cc
index a55e8c392e2..6e8aa313a36 100644
--- a/src/mon/DataHealthService.cc
+++ b/src/mon/DataHealthService.cc
@@ -93,7 +93,7 @@ health_status_t DataHealthService::get_health(
     }
 
     if (f) {
-      f->open_object_section(mon_name.c_str());
+      f->open_object_section("mon");
       f->dump_string("name", mon_name.c_str());
       f->dump_int("kb_total", stats.kb_total);
       f->dump_int("kb_used", stats.kb_used);
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 172f0f1e3db..f0fb4ae8332 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -563,7 +563,9 @@ bool MDSMonitor::preprocess_command(MMonCommand *m)
 
   if (prefix == "mds stat") {
     if (f) {
+      f->open_object_section("mds_stat");
       dump_info(f.get());
+      f->close_section();
       f->flush(ds);
     } else {
       ds << mdsmap;
diff --git a/src/mon/MonCap.cc b/src/mon/MonCap.cc
index 8e35b775247..b03873ad7dd 100644
--- a/src/mon/MonCap.cc
+++ b/src/mon/MonCap.cc
@@ -133,6 +133,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const
   if (profile == "mds") {
     profile_grants.push_back(MonCapGrant("mds", MON_CAP_ALL));
     profile_grants.push_back(MonCapGrant("mon", MON_CAP_R));
+    profile_grants.push_back(MonCapGrant("osd", MON_CAP_R));
     profile_grants.push_back(MonCapGrant("log", MON_CAP_W));
   }
   if (profile == "osd" || profile == "mds" || profile == "mon") {
@@ -149,7 +150,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const
     profile_grants.push_back(MonCapGrant("osd create"));
     profile_grants.push_back(MonCapGrant("osd crush set"));  // FIXME: constraint this further?
     profile_grants.push_back(MonCapGrant("auth add"));
-    profile_grants.back().command_args["name"] = StringConstraint("", "osd.");
+    profile_grants.back().command_args["entity"] = StringConstraint("", "osd.");
     profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile osd", "");
     profile_grants.back().command_args["caps_osd"] = StringConstraint("allow *", "");
   }
@@ -158,7 +159,7 @@ void MonCapGrant::expand_profile(entity_name_t name) const
     profile_grants.push_back(MonCapGrant("osd", MON_CAP_R));  // read osdmap
     profile_grants.push_back(MonCapGrant("mon getmap"));
     profile_grants.push_back(MonCapGrant("auth get-or-create"));  // FIXME: this can expose other mds keys
-    profile_grants.back().command_args["name"] = StringConstraint("", "mds.");
+    profile_grants.back().command_args["entity"] = StringConstraint("", "mds.");
     profile_grants.back().command_args["caps_mon"] = StringConstraint("allow profile mds", "");
     profile_grants.back().command_args["caps_osd"] = StringConstraint("allow rwx", "");
     profile_grants.back().command_args["caps_mds"] = StringConstraint("allow", "");
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 5b950ca1aef..0980893bf9b 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -482,7 +482,7 @@ COMMAND("osd pool delete " \
 	"name=pool2,type=CephPoolname " \
 	"name=sure,type=CephChoices,strings=--yes-i-really-really-mean-it", \
 	"delete pool (say pool twice, add --yes-i-really-really-mean-it)", \
-	"osd", "r", "cli,rest")
+	"osd", "rw", "cli,rest")
 COMMAND("osd pool rename " \
 	"name=srcpool,type=CephPoolname " \
 	"name=destpool,type=CephPoolname", \
@@ -494,8 +494,7 @@ COMMAND("osd pool get " \
 COMMAND("osd pool set " \
 	"name=pool,type=CephPoolname " \
 	"name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset " \
-	"name=val,type=CephInt " \
-	"name=sure,type=CephChoices,strings=--allow-experimental-feature,req=false", \
+	"name=val,type=CephInt", \
 	"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
 // 'val' is a CephString because it can include a unit.  Perhaps
 // there should be a Python type for validation/conversion of strings
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index f537c915945..119ef740aa8 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -143,6 +143,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
   elector(this),
   leader(0),
   quorum_features(0),
+  scrub_version(0),
 
   // sync state
   sync_provider_count(0),
@@ -224,21 +225,20 @@ class AdminHook : public AdminSocketHook {
   Monitor *mon;
 public:
   AdminHook(Monitor *m) : mon(m) {}
-  bool call(std::string command, std::string args, bufferlist& out) {
+  bool call(std::string command, std::string args, std::string format,
+	    bufferlist& out) {
     stringstream ss;
-    mon->do_admin_command(command, args, ss);
+    mon->do_admin_command(command, args, format, ss);
     out.append(ss);
     return true;
   }
 };
 
-void Monitor::do_admin_command(string command, string args, ostream& ss)
+void Monitor::do_admin_command(string command, string args, string format,
+			       ostream& ss)
 {
   Mutex::Locker l(lock);
 
-  map<string, cmd_vartype> cmdmap;
-  string format;
-  cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
   boost::scoped_ptr<Formatter> f(new_formatter(format));
 
   if (command == "mon_status")
@@ -1534,11 +1534,23 @@ bool Monitor::_allowed_command(MonSession *s, map<string, cmd_vartype>& cmd)
   map<string,string> strmap;
   for (map<string, cmd_vartype>::const_iterator p = cmd.begin();
        p != cmd.end(); ++p) {
-    if (p->first != "prefix") {
-      strmap[p->first] = cmd_vartype_stringify(p->second);
+    if (p->first == "prefix")
+      continue;
+    if (p->first == "caps") {
+      vector<string> cv;
+      if (cmd_getval(g_ceph_context, cmd, "caps", cv) &&
+	  cv.size() % 2 == 0) {
+	for (unsigned i = 0; i < cv.size(); i += 2) {
+	  string k = string("caps_") + cv[i];
+	  strmap[k] = cv[i + 1];
+	}
+	continue;
+      }
     }
+    strmap[p->first] = cmd_vartype_stringify(p->second);
   }
 
+  dout(20) << __func__ << " strmap " << strmap << dendl;
   if (s->caps.is_capable(g_ceph_context, s->inst.name,
 			 "", prefix, strmap, false, false, true)) {
     retval = true; 
@@ -1589,6 +1601,14 @@ void Monitor::_quorum_status(Formatter *f, ostream& ss)
     f->dump_int("mon", *p);
   f->close_section(); // quorum
 
+  set<string> quorum_names = get_quorum_names();
+  f->open_array_section("quorum_names");
+  for (set<string>::iterator p = quorum_names.begin(); p != quorum_names.end(); ++p)
+    f->dump_string("mon", *p);
+  f->close_section(); // quorum_names
+
+  f->dump_string("quorum_leader_name", quorum.empty() ? string() : monmap->get_name(*quorum.begin()));
+
   f->open_object_section("monmap");
   monmap->dump(f);
   f->close_section(); // monmap
@@ -1832,6 +1852,7 @@ void Monitor::get_status(stringstream &ss, Formatter *f)
     f->open_object_section("mdsmap");
     mdsmon()->mdsmap.print_summary(f, NULL);
     f->close_section();
+    f->close_section();
   } else {
     ss << "  cluster " << monmap->get_fsid() << "\n";
     ss << "   health " << health << "\n";
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 82b08816702..bed48ecee34 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -745,7 +745,8 @@ public:
   int write_fsid();
   int write_fsid(MonitorDBStore::Transaction &t);
 
-  void do_admin_command(std::string command, std::string args, ostream& ss);
+  void do_admin_command(std::string command, std::string args,
+			std::string format, ostream& ss);
 
 private:
   // don't allow copying
diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc
index 195f66350d8..5ec1583b82f 100644
--- a/src/mon/MonmapMonitor.cc
+++ b/src/mon/MonmapMonitor.cc
@@ -322,8 +322,8 @@ bool MonmapMonitor::prepare_command(MMonCommand *m)
     string name;
     cmd_getval(g_ceph_context, cmdmap, "name", name);
     if (!pending_map.contains(name)) {
-      err = -ENOENT;
-      ss << "mon " << name << " does not exist";
+      err = 0;
+      ss << "mon " << name << " does not exist or has already been removed";
       goto out;
     }
 
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 20e4eac88cb..c6db052a591 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -125,6 +125,51 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
   version_t latest_full = get_version_latest_full();
   if (latest_full == 0 && get_first_committed() > 1)
     latest_full = get_first_committed();
+
+  if (latest_full > 0) {
+    // make sure we can really believe get_version_latest_full(); see
+    // 76cd7ac1c2094b34ad36bea89b2246fa90eb2f6d
+    bufferlist test;
+    get_version_full(latest_full, test);
+    if (test.length() == 0) {
+      dout(10) << __func__ << " ignoring recorded latest_full as it is missing; fallback to search" << dendl;
+      latest_full = 0;
+    }
+  }
+  if (get_first_committed() > 1 &&
+      latest_full < get_first_committed()) {
+    /* a bug introduced in 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6 would lead
+     * us to not update the on-disk latest_full key.  Upon trim, the actual
+     * version would cease to exist but we would still point to it.  This
+     * makes sure we get it pointing to a proper version.
+     */
+    version_t lc = get_last_committed();
+    version_t fc = get_first_committed();
+
+    dout(10) << __func__ << " looking for valid full map in interval"
+             << " [" << fc << ", " << lc << "]" << dendl;
+
+    latest_full = 0;
+    for (version_t v = lc; v >= fc; v--) {
+      string full_key = "full_" + stringify(v);
+      if (mon->store->exists(get_service_name(), full_key)) {
+        dout(10) << __func__ << " found latest full map v " << v << dendl;
+        latest_full = v;
+        break;
+      }
+    }
+
+    // if we trigger this, then there's something else going with the store
+    // state, and we shouldn't want to work around it without knowing what
+    // exactly happened.
+    assert(latest_full > 0);
+    MonitorDBStore::Transaction t;
+    put_version_latest_full(&t, latest_full);
+    mon->store->apply_transaction(t);
+    dout(10) << __func__ << " updated the on-disk full map version to "
+             << latest_full << dendl;
+  }
+
   if ((latest_full > 0) && (latest_full > osdmap.epoch)) {
     bufferlist latest_bl;
     get_version_full(latest_full, latest_bl);
@@ -526,17 +571,6 @@ void OSDMonitor::encode_pending(MonitorDBStore::Transaction *t)
   put_last_committed(t, pending_inc.epoch);
 }
 
-void OSDMonitor::encode_full(MonitorDBStore::Transaction *t)
-{
-  dout(10) << __func__ << " osdmap e " << osdmap.epoch << dendl;
-  assert(get_last_committed() == osdmap.epoch);
- 
-  bufferlist osdmap_bl;
-  osdmap.encode(osdmap_bl);
-  put_version_full(t, osdmap.epoch, osdmap_bl);
-  put_version_latest_full(t, osdmap.epoch);
-}
-
 void OSDMonitor::share_map_with_random_osd()
 {
   if (osdmap.get_num_up_osds() == 0) {
@@ -586,6 +620,7 @@ void OSDMonitor::encode_trim_extra(MonitorDBStore::Transaction *tx, version_t fi
   bufferlist bl;
   get_version_full(first, bl);
   put_version_full(tx, first, bl);
+  put_version_latest_full(tx, first);
 }
 
 // -------------
@@ -1930,6 +1965,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
 
   if (prefix == "osd stat") {
     osdmap.print_summary(f.get(), ds);
+    if (f)
+      f->flush(ds);
     rdata.append(ds);
   }
   else if (prefix == "osd dump" ||
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index d7cb8fdf369..f66c676b53d 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -150,11 +150,15 @@ private:
   void update_from_paxos(bool *need_bootstrap);
   void create_pending();  // prepare a new pending
   void encode_pending(MonitorDBStore::Transaction *t);
-  virtual void encode_full(MonitorDBStore::Transaction *t);
   void on_active();
   void on_shutdown();
 
   /**
+   * we haven't delegated full version stashing to paxosservice for some time
+   * now, making this function useless in current context.
+   */
+  virtual void encode_full(MonitorDBStore::Transaction *t) { }
+  /**
    * do not let paxosservice periodically stash full osdmaps, or we will break our
    * locally-managed full maps.  (update_from_paxos loads the latest and writes them
    * out going forward from there, but if we just synced that may mean we skip some.)
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index 8b37376ba7a..86ad87bd929 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -788,12 +788,16 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
 {
   std::stringstream ss;
   if (f)
-    f->open_object_section("pgs_by_state");
+    f->open_array_section("pgs_by_state");
+
   for (hash_map<int,int>::const_iterator p = num_pg_by_state.begin();
        p != num_pg_by_state.end();
        ++p) {
     if (f) {
-      f->dump_unsigned(pg_state_string(p->first).c_str(), p->second);
+      f->open_object_section("pgs_by_state_element");
+      f->dump_string("state_name", pg_state_string(p->first));
+      f->dump_unsigned("count", p->second);
+      f->close_section();
     } else {
       if (p != num_pg_by_state.begin())
 	ss << ", ";
diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc
index 648a8fe2384..d86cbe70c19 100644
--- a/src/mon/PGMonitor.cc
+++ b/src/mon/PGMonitor.cc
@@ -255,6 +255,12 @@ void PGMonitor::update_from_paxos(bool *need_bootstrap)
   update_logger();
 }
 
+void PGMonitor::on_upgrade()
+{
+  dout(1) << __func__ << " discarding in-core PGMap" << dendl;
+  pg_map = PGMap();
+}
+
 void PGMonitor::upgrade_format()
 {
   unsigned current = 1;
diff --git a/src/mon/PGMonitor.h b/src/mon/PGMonitor.h
index e8e1b4210aa..44015395e94 100644
--- a/src/mon/PGMonitor.h
+++ b/src/mon/PGMonitor.h
@@ -60,6 +60,7 @@ private:
   void create_initial();
   void update_from_paxos(bool *need_bootstrap);
   void upgrade_format();
+  void on_upgrade();
   void post_paxos_update();
   void handle_osd_timeouts();
   void create_pending();  // prepare a new pending
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index 508669deef5..a543abed7ed 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -210,7 +210,7 @@ void Paxos::handle_collect(MMonPaxos *collect)
   // do we have an accepted but uncommitted value?
   //  (it'll be at last_committed+1)
   bufferlist bl;
-  if (collect->last_committed == last_committed &&
+  if (collect->last_committed <= last_committed &&
       get_store()->exists(get_name(), last_committed+1)) {
     get_store()->get(get_name(), last_committed+1, bl);
     assert(bl.length() > 0);
diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc
index d6e67a1c4b4..1b21689863b 100644
--- a/src/mon/PaxosService.cc
+++ b/src/mon/PaxosService.cc
@@ -114,7 +114,13 @@ void PaxosService::refresh(bool *need_bootstrap)
   // update cached versions
   cached_first_committed = mon->store->get(get_service_name(), first_committed_name);
   cached_last_committed = mon->store->get(get_service_name(), last_committed_name);
-  format_version = get_value("format_version");
+
+  version_t new_format = get_value("format_version");
+  if (new_format != format_version) {
+    dout(1) << __func__ << " upgraded, format " << format_version << " -> " << new_format << dendl;
+    on_upgrade();
+  }
+  format_version = new_format;
 
   dout(10) << __func__ << dendl;
 
diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h
index 74d5a90494c..5321bebcace 100644
--- a/src/mon/PaxosService.h
+++ b/src/mon/PaxosService.h
@@ -459,6 +459,11 @@ public:
   virtual void upgrade_format() { }
 
   /**
+   * this is called when we detect the store has just upgraded underneath us
+   */
+  virtual void on_upgrade() {}
+
+  /**
    * Called when the Paxos system enters a Leader election.
    *
    * @remarks It's a courtesy method, in case the class implementing this
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 17105c11d69..108a857ab9f 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -36,7 +36,7 @@
 #endif
 
 #include "include/compat.h"
-#include "include/fiemap.h"
+#include "include/linux_fiemap.h"
 
 #include "common/xattr.h"
 #include "chain_xattr.h"
@@ -2243,6 +2243,7 @@ int FileStore::_check_global_replay_guard(coll_t cid,
   if (r < 0) {
     dout(20) << __func__ << " no xattr" << dendl;
     assert(!m_filestore_fail_eio || r != -EIO);
+    TEMP_FAILURE_RETRY(::close(fd));
     return 1;  // no xattr
   }
   bufferlist bl;
@@ -4304,9 +4305,6 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid,
   get_cdir(cid, old_coll, sizeof(old_coll));
   get_cdir(ncid, new_coll, sizeof(new_coll));
 
-  _set_global_replay_guard(cid, spos);
-  _set_replay_guard(cid, spos);
-
   if (_check_replay_guard(cid, spos) < 0) {
     return 0;
   }
@@ -4315,6 +4313,16 @@ int FileStore::_collection_rename(const coll_t &cid, const coll_t &ncid,
     return _collection_remove_recursive(cid, spos);
   }
 
+  if (!collection_exists(cid)) {
+    if (replaying) {
+      // already happened
+      return 0;
+    } else {
+      return -ENOENT;
+    }
+  }
+  _set_global_replay_guard(cid, spos);
+
   int ret = 0;
   if (::rename(old_coll, new_coll)) {
     if (replaying && !btrfs_stable_commits &&
diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc
index 86a912bbef2..c279bab3a60 100644
--- a/src/os/HashIndex.cc
+++ b/src/os/HashIndex.cc
@@ -447,18 +447,7 @@ int HashIndex::complete_merge(const vector<string> &path, subdir_info_s info) {
     r = move_objects(path, dst);
     if (r < 0)
       return r;
-    
-    map<string,hobject_t> objects_dst;
-    r = list_objects(dst, 0, 0, &objects_dst);
-    if (r < 0)
-      return r;
-    set<string> subdirs;
-    r = list_subdirs(dst, &subdirs);
-    if (r < 0)
-      return r;
-    dstinfo.objs = objects_dst.size();
-    dstinfo.subdirs = subdirs.size() - 1;
-    r = set_info(dst, dstinfo);
+    r = reset_attr(dst);
     if (r < 0)
       return r;
     r = remove_path(path);
@@ -576,7 +565,7 @@ int HashIndex::complete_split(const vector<string> &path, subdir_info_s info) {
   if (r < 0)
     return r;
   info.objs = objects.size();
-  r = set_info(path, info);
+  r = reset_attr(path);
   if (r < 0)
     return r;
   r = fsync_dir(path);
diff --git a/src/os/LFNIndex.cc b/src/os/LFNIndex.cc
index edf361a44f0..09d0f02267f 100644
--- a/src/os/LFNIndex.cc
+++ b/src/os/LFNIndex.cc
@@ -75,16 +75,19 @@ int LFNIndex::init()
 
 int LFNIndex::created(const hobject_t &hoid, const char *path)
 {
+  WRAP_RETRY(
   vector<string> path_comp;
   string short_name;
-  int r;
   r = decompose_full_path(path, &path_comp, 0, &short_name);
   if (r < 0)
-    return r;
+    goto out;
   r = lfn_created(path_comp, hoid, short_name);
   if (r < 0)
-    return r;
-  return _created(path_comp, hoid, short_name);
+    goto out;
+  r = _created(path_comp, hoid, short_name);
+  if (r < 0)
+    goto out;
+    );
 }
 
 int LFNIndex::unlink(const hobject_t &hoid)
diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc
index ae97b6b08d3..9d8b989225b 100644
--- a/src/os/ObjectStore.cc
+++ b/src/os/ObjectStore.cc
@@ -435,6 +435,7 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f)
 	f->dump_string("first", first);
 	f->dump_string("last", last);
       }
+      break;
 
     default:
       f->dump_string("op_name", "unknown");
diff --git a/src/osd/ClassHandler.cc b/src/osd/ClassHandler.cc
index 5af2ac01a0f..a9a920ba078 100644
--- a/src/osd/ClassHandler.cc
+++ b/src/osd/ClassHandler.cc
@@ -18,6 +18,11 @@
 #undef dout_prefix
 #define dout_prefix *_dout
 
+
+#define CLS_PREFIX "libcls_"
+#define CLS_SUFFIX ".so"
+
+
 int ClassHandler::open_class(const string& cname, ClassData **pcls)
 {
   Mutex::Locker lock(mutex);
@@ -31,11 +36,43 @@ int ClassHandler::open_class(const string& cname, ClassData **pcls)
   return 0;
 }
 
+int ClassHandler::open_all_classes()
+{
+  dout(10) << __func__ << dendl;
+  DIR *dir = ::opendir(g_conf->osd_class_dir.c_str());
+  if (!dir)
+    return -errno;
+
+  char buf[offsetof(struct dirent, d_name) + PATH_MAX + 1];
+  struct dirent *pde;
+  int r = 0;
+  while ((r = ::readdir_r(dir, (dirent *)&buf, &pde)) == 0 && pde) {
+    if (pde->d_name[0] == '.')
+      continue;
+    if (strlen(pde->d_name) > sizeof(CLS_PREFIX) - 1 + sizeof(CLS_SUFFIX) - 1 &&
+	strncmp(pde->d_name, CLS_PREFIX, sizeof(CLS_PREFIX) - 1) == 0 &&
+	strcmp(pde->d_name + strlen(pde->d_name) - (sizeof(CLS_SUFFIX) - 1), CLS_SUFFIX) == 0) {
+      char cname[PATH_MAX + 1];
+      strcpy(cname, pde->d_name + sizeof(CLS_PREFIX) - 1);
+      cname[strlen(cname) - (sizeof(CLS_SUFFIX) - 1)] = '\0';
+      dout(10) << __func__ << " found " << cname << dendl;
+      ClassData *cls;
+      r = open_class(cname, &cls);
+      if (r < 0)
+	goto out;
+    }
+  }
+ out:
+  closedir(dir);
+  return r;
+}
+
 void ClassHandler::shutdown()
 {
   for (map<string, ClassData>::iterator p = classes.begin(); p != classes.end(); ++p) {
     dlclose(p->second.handle);
   }
+  classes.clear();
 }
 
 ClassHandler::ClassData *ClassHandler::_get_class(const string& cname)
@@ -63,7 +100,7 @@ int ClassHandler::_load_class(ClassData *cls)
   if (cls->status == ClassData::CLASS_UNKNOWN ||
       cls->status == ClassData::CLASS_MISSING) {
     char fname[PATH_MAX];
-    snprintf(fname, sizeof(fname), "%s/libcls_%s.so",
+    snprintf(fname, sizeof(fname), "%s/" CLS_PREFIX "%s" CLS_SUFFIX,
 	     g_conf->osd_class_dir.c_str(),
 	     cls->name.c_str());
     dout(10) << "_load_class " << cls->name << " from " << fname << dendl;
diff --git a/src/osd/ClassHandler.h b/src/osd/ClassHandler.h
index 733ed01a35d..f7c80f9454b 100644
--- a/src/osd/ClassHandler.h
+++ b/src/osd/ClassHandler.h
@@ -78,6 +78,8 @@ private:
 public:
   ClassHandler() : mutex("ClassHandler") {}
   
+  int open_all_classes();
+
   int open_class(const string& cname, ClassData **pcls);
   
   ClassData *register_class(const char *cname);
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 3f226cec95d..e3a7c227e15 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -291,11 +291,13 @@ void OSDService::init_splits_between(pg_t pgid,
     // Ok, a split happened, so we need to walk the osdmaps
     set<pg_t> new_pgs; // pgs to scan on each map
     new_pgs.insert(pgid);
+    OSDMapRef curmap(get_map(frommap->get_epoch()));
     for (epoch_t e = frommap->get_epoch() + 1;
 	 e <= tomap->get_epoch();
 	 ++e) {
-      OSDMapRef curmap(get_map(e-1));
-      OSDMapRef nextmap(get_map(e));
+      OSDMapRef nextmap(try_get_map(e));
+      if (!nextmap)
+	continue;
       set<pg_t> even_newer_pgs; // pgs added in this loop
       for (set<pg_t>::iterator i = new_pgs.begin(); i != new_pgs.end(); ++i) {
 	set<pg_t> split_pgs;
@@ -307,7 +309,9 @@ void OSDService::init_splits_between(pg_t pgid,
 	}
       }
       new_pgs.insert(even_newer_pgs.begin(), even_newer_pgs.end());
+      curmap = nextmap;
     }
+    assert(curmap == tomap); // we must have had both frommap and tomap
   }
 }
 
@@ -993,44 +997,44 @@ class OSDSocketHook : public AdminSocketHook {
   OSD *osd;
 public:
   OSDSocketHook(OSD *o) : osd(o) {}
-  bool call(std::string command, std::string args, bufferlist& out) {
+  bool call(std::string command, std::string args, std::string format,
+	    bufferlist& out) {
     stringstream ss;
-    bool r = osd->asok_command(command, args, ss);
+    bool r = osd->asok_command(command, args, format, ss);
     out.append(ss);
     return r;
   }
 };
 
-bool OSD::asok_command(string command, string args, ostream& ss)
+bool OSD::asok_command(string command, string args, string format, ostream& ss)
 {
+  if (format == "")
+    format = "json-pretty";
+  Formatter *f = new_formatter(format);
   if (command == "dump_ops_in_flight") {
-    op_tracker.dump_ops_in_flight(ss);
+    op_tracker.dump_ops_in_flight(f);
   } else if (command == "dump_historic_ops") {
-    op_tracker.dump_historic_ops(ss);
+    op_tracker.dump_historic_ops(f);
   } else if (command == "dump_op_pq_state") {
-    JSONFormatter f(true);
-    f.open_object_section("pq");
-    op_wq.dump(&f);
-    f.close_section();
-    f.flush(ss);
+    f->open_object_section("pq");
+    op_wq.dump(f);
+    f->close_section();
   } else if (command == "dump_blacklist") {
     list<pair<entity_addr_t,utime_t> > bl;
     OSDMapRef curmap = service.get_osdmap();
 
-    JSONFormatter f(true);
-    f.open_array_section("blacklist");
+    f->open_array_section("blacklist");
     curmap->get_blacklist(&bl);
     for (list<pair<entity_addr_t,utime_t> >::iterator it = bl.begin();
 	it != bl.end(); ++it) {
-      f.open_array_section("entry");
-      f.open_object_section("entity_addr_t");
-      it->first.dump(&f);
-      f.close_section(); //entity_addr_t
-      it->second.localtime(f.dump_stream("expire_time"));
-      f.close_section(); //entry
-    }
-    f.close_section(); //blacklist
-    f.flush(ss);
+      f->open_array_section("entry");
+      f->open_object_section("entity_addr_t");
+      it->first.dump(f);
+      f->close_section(); //entity_addr_t
+      it->second.localtime(f->dump_stream("expire_time"));
+      f->close_section(); //entry
+    }
+    f->close_section(); //blacklist
   } else if (command == "dump_watchers") {
     list<obj_watch_item_t> watchers;
     osd_lock.Lock();
@@ -1048,35 +1052,35 @@ bool OSD::asok_command(string command, string args, ostream& ss)
     }
     osd_lock.Unlock();
 
-    JSONFormatter f(true);
-    f.open_array_section("watchers");
+    f->open_array_section("watchers");
     for (list<obj_watch_item_t>::iterator it = watchers.begin();
 	it != watchers.end(); ++it) {
 
-      f.open_array_section("watch");
+      f->open_array_section("watch");
 
-      f.dump_string("namespace", it->obj.nspace);
-      f.dump_string("object", it->obj.oid.name);
+      f->dump_string("namespace", it->obj.nspace);
+      f->dump_string("object", it->obj.oid.name);
 
-      f.open_object_section("entity_name");
-      it->wi.name.dump(&f);
-      f.close_section(); //entity_name_t
+      f->open_object_section("entity_name");
+      it->wi.name.dump(f);
+      f->close_section(); //entity_name_t
 
-      f.dump_int("cookie", it->wi.cookie);
-      f.dump_int("timeout", it->wi.timeout_seconds);
+      f->dump_int("cookie", it->wi.cookie);
+      f->dump_int("timeout", it->wi.timeout_seconds);
 
-      f.open_object_section("entity_addr_t");
-      it->wi.addr.dump(&f);
-      f.close_section(); //entity_addr_t
+      f->open_object_section("entity_addr_t");
+      it->wi.addr.dump(f);
+      f->close_section(); //entity_addr_t
 
-      f.close_section(); //watch
+      f->close_section(); //watch
     }
 
-    f.close_section(); //watches
-    f.flush(ss);
+    f->close_section(); //watches
   } else {
     assert(0 == "broken asok registration");
   }
+  f->flush(ss);
+  delete f;
   return true;
 }
 
@@ -1085,7 +1089,8 @@ class TestOpsSocketHook : public AdminSocketHook {
   ObjectStore *store;
 public:
   TestOpsSocketHook(OSDService *s, ObjectStore *st) : service(s), store(st) {}
-  bool call(std::string command, std::string args, bufferlist& out) {
+  bool call(std::string command, std::string args, std::string format,
+	    bufferlist& out) {
     stringstream ss;
     test_ops(service, store, command, args, ss);
     out.append(ss);
@@ -1162,6 +1167,12 @@ int OSD::init()
   class_handler = new ClassHandler();
   cls_initialize(class_handler);
 
+  if (g_conf->osd_open_classes_on_start) {
+    int r = class_handler->open_all_classes();
+    if (r)
+      dout(1) << "warning: got an error loading one or more classes: " << cpp_strerror(r) << dendl;
+  }
+
   // load up "current" osdmap
   assert_warn(!osdmap);
   if (osdmap) {
@@ -3808,51 +3819,90 @@ void OSD::handle_command(MCommand *m)
 struct OSDCommand {
   string cmdstring;
   string helpstring;
+  string module;
+  string perm;
+  string availability;
 } osd_commands[] = {
 
-#define COMMAND(parsesig, helptext) \
-  {parsesig, helptext},
+#define COMMAND(parsesig, helptext, module, perm, availability) \
+  {parsesig, helptext, module, perm, availability},
 
 // yes, these are really pg commands, but there's a limit to how
-// much work it's worth.  The OSD returns all of them.
+// much work it's worth.  The OSD returns all of them.  Make this
+// form (pg <pgid> <cmd>) valid only for the cli. 
+// Rest uses "tell <pgid> <cmd>"
 
 COMMAND("pg " \
 	"name=pgid,type=CephPgid " \
 	"name=cmd,type=CephChoices,strings=query", \
-	"show details of a specific pg")
+	"show details of a specific pg", "osd", "r", "cli")
 COMMAND("pg " \
 	"name=pgid,type=CephPgid " \
 	"name=cmd,type=CephChoices,strings=mark_unfound_lost " \
 	"name=mulcmd,type=CephChoices,strings=revert", \
-	"mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available")
+	"mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available",
+	"osd", "rw", "cli")
 COMMAND("pg " \
 	"name=pgid,type=CephPgid " \
 	"name=cmd,type=CephChoices,strings=list_missing " \
 	"name=offset,type=CephString,req=false",
-	"list missing objects on this pg, perhaps starting at an offset given in JSON")
+	"list missing objects on this pg, perhaps starting at an offset given in JSON",
+	"osd", "r", "cli")
 
-COMMAND("version", "report version of OSD")
+// new form: tell <pgid> <cmd> for both cli and rest 
+
+COMMAND("query",
+	"show details of a specific pg", "osd", "r", "cli,rest")
+COMMAND("mark_unfound_lost " \
+	"name=mulcmd,type=CephChoices,strings=revert", \
+	"mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available",
+	"osd", "rw", "cli,rest")
+COMMAND("list_missing " \
+	"name=offset,type=CephString,req=false",
+	"list missing objects on this pg, perhaps starting at an offset given in JSON",
+	"osd", "r", "cli,rest")
+
+// tell <osd.n> commands.  Validation of osd.n must be special-cased in client
+
+// tell <osd.n> commands.  Validation of osd.n must be special-cased in client
+COMMAND("version", "report version of OSD", "osd", "r", "cli,rest")
 COMMAND("injectargs " \
 	"name=injected_args,type=CephString,n=N",
-	"inject configuration arguments into running OSD")
+	"inject configuration arguments into running OSD",
+	"osd", "rw", "cli,rest")
 COMMAND("bench " \
 	"name=count,type=CephInt,req=false " \
 	"name=size,type=CephInt,req=false ", \
 	"OSD benchmark: write <count> <size>-byte objects, " \
-	"(default 1G size 4MB). Results in log.")
-COMMAND("flush_pg_stats", "flush pg stats")
-COMMAND("debug dump_missing " \
+	"(default 1G size 4MB). Results in log.",
+	"osd", "rw", "cli,rest")
+COMMAND("flush_pg_stats", "flush pg stats", "osd", "rw", "cli,rest")
+COMMAND("debug_dump_missing " \
 	"name=filename,type=CephFilepath",
-	"dump missing objects to a named file")
+	"dump missing objects to a named file", "osd", "r", "cli,rest")
 COMMAND("debug kick_recovery_wq " \
 	"name=delay,type=CephInt,range=0",
-	"set osd_recovery_delay_start to <val>")
+	"set osd_recovery_delay_start to <val>", "osd", "rw", "cli,rest")
 COMMAND("cpu_profiler " \
 	"name=arg,type=CephChoices,strings=status|flush",
-	"run cpu profiling on daemon")
-COMMAND("dump_pg_recovery_stats", "dump pg recovery statistics")
-COMMAND("reset_pg_recovery_stats", "reset pg recovery statistics")
-
+	"run cpu profiling on daemon", "osd", "rw", "cli,rest")
+COMMAND("dump_pg_recovery_stats", "dump pg recovery statistics",
+	"osd", "r", "cli,rest")
+COMMAND("reset_pg_recovery_stats", "reset pg recovery statistics",
+	"osd", "rw", "cli,rest")
+
+// experiment: restate pg commands as "tell <pgid>".  Validation of
+// pgid must be special-cased in client.
+COMMAND("query",
+	"show details of a specific pg", "osd", "r", "cli,rest")
+COMMAND("mark_unfound_lost revert " \
+	"name=mulcmd,type=CephChoices,strings=revert", \
+	"mark all unfound objects in this pg as lost, either removing or reverting to a prior version if one is available",
+	"osd", "rw", "cli,rest")
+COMMAND("list_missing " \
+	"name=offset,type=CephString,req=false",
+	"list missing objects on this pg, perhaps starting at an offset given in JSON",
+	"osd", "rw", "cli,rest")
 };
 
 void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist& data)
@@ -3866,6 +3916,9 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
 
   map<string, cmd_vartype> cmdmap;
   string prefix;
+  string format;
+  string pgidstr;
+  boost::scoped_ptr<Formatter> f;
 
   if (cmd.empty()) {
     ss << "no command given";
@@ -3888,8 +3941,8 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
 
       ostringstream secname;
       secname << "cmd" << setfill('0') << std::setw(3) << cmdnum;
-      dump_cmd_and_help_to_json(f, secname.str(),
-				cp->cmdstring, cp->helpstring);
+      dump_cmddesc_to_json(f, secname.str(), cp->cmdstring, cp->helpstring,
+			   cp->module, cp->perm, cp->availability);
       cmdnum++;
     }
     f->close_section();	// command_descriptions
@@ -3900,8 +3953,18 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
     goto out;
   }
 
+  cmd_getval(g_ceph_context, cmdmap, "format", format);
+  f.reset(new_formatter(format));
+
   if (prefix == "version") {
-    ds << pretty_version_to_str();
+    if (f) {
+      f->open_object_section("version");
+      f->dump_string("version", pretty_version_to_str());
+      f->close_section();
+      f->flush(ds);
+    } else {
+      ds << pretty_version_to_str();
+    }
     goto out;
   }
   else if (prefix == "injectargs") {
@@ -3921,9 +3984,16 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
     osd_lock.Lock();
   }
 
-  else if (prefix == "pg") {
+  // either 'pg <pgid> <command>' or
+  // 'tell <pgid>' (which comes in without any of that prefix)?
+
+  else if (prefix == "pg" ||
+	   (cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr) &&
+	     (prefix == "query" ||
+	      prefix == "mark_unfound_lost" ||
+	      prefix == "list_missing")
+	   )) {
     pg_t pgid;
-    string pgidstr;
 
     if (!cmd_getval(g_ceph_context, cmdmap, "pgid", pgidstr)) {
       ss << "no pgid specified";
@@ -3932,14 +4002,15 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
       ss << "couldn't parse pgid '" << pgidstr << "'";
       r = -EINVAL;
     } else {
-      vector<string> args;
-      cmd_getval(g_ceph_context, cmdmap, "args", args);
       PG *pg = _lookup_lock_pg(pgid);
       if (!pg) {
 	ss << "i don't have pgid " << pgid;
 	r = -ENOENT;
       } else {
-	r = pg->do_command(cmd, ss, data, odata);
+	// simulate pg <pgid> cmd= for pg->do-command
+	if (prefix != "pg")
+	  cmd_putval(g_ceph_context, cmdmap, "cmd", prefix);
+	r = pg->do_command(cmdmap, ss, data, odata);
 	pg->unlock();
       }
     }
@@ -3978,9 +4049,18 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
     store->queue_transaction(NULL, cleanupt);
 
     uint64_t rate = (double)count / (end - start);
-    ss << "bench: wrote " << prettybyte_t(count)
-       << " in blocks of " << prettybyte_t(bsize) << " in "
-       << (end-start) << " sec at " << prettybyte_t(rate) << "/sec";
+    if (f) {
+      f->open_object_section("osd_bench_results");
+      f->dump_int("bytes_written", count);
+      f->dump_int("blocksize", bsize);
+      f->dump_float("bytes_per_sec", rate);
+      f->close_section();
+      f->flush(ss);
+    } else {
+      ss << "bench: wrote " << prettybyte_t(count)
+	 << " in blocks of " << prettybyte_t(bsize) << " in "
+	 << (end-start) << " sec at " << prettybyte_t(rate) << "/sec";
+    }
   }
 
   else if (prefix == "flush_pg_stats") {
@@ -4077,8 +4157,13 @@ void OSD::do_command(Connection *con, tid_t tid, vector<string>& cmd, bufferlist
 
   else if (prefix == "dump_pg_recovery_stats") {
     stringstream s;
-    pg_recovery_stats.dump(s);
-    ds << "dump pg recovery stats: " << s.str();
+    if (f) {
+      pg_recovery_stats.dump_formatted(f.get());
+      f->flush(ds);
+    } else {
+      pg_recovery_stats.dump(s);
+      ds << "dump pg recovery stats: " << s.str();
+    }
   }
 
   else if (prefix == "reset_pg_recovery_stats") {
@@ -5177,7 +5262,9 @@ void OSD::advance_pg(
   for (;
        next_epoch <= osd_epoch;
        ++next_epoch) {
-    OSDMapRef nextmap = get_map(next_epoch);
+    OSDMapRef nextmap = service.try_get_map(next_epoch);
+    if (!nextmap)
+      continue;
 
     vector<int> newup, newacting;
     nextmap->pg_to_up_acting_osds(pg->info.pgid, newup, newacting);
@@ -5511,7 +5598,7 @@ OSDMapRef OSDService::_add_map(OSDMap *o)
   return l;
 }
 
-OSDMapRef OSDService::get_map(epoch_t epoch)
+OSDMapRef OSDService::try_get_map(epoch_t epoch)
 {
   Mutex::Locker l(map_cache_lock);
   OSDMapRef retval = map_cache.lookup(epoch);
@@ -5524,7 +5611,10 @@ OSDMapRef OSDService::get_map(epoch_t epoch)
   if (epoch > 0) {
     dout(20) << "get_map " << epoch << " - loading and decoding " << map << dendl;
     bufferlist bl;
-    assert(_get_map_bl(epoch, bl));
+    if (!_get_map_bl(epoch, bl)) {
+      delete map;
+      return OSDMapRef();
+    }
     map->decode(bl);
   } else {
     dout(20) << "get_map " << epoch << " - return initial " << map << dendl;
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 04ad4dcd7d7..5bcff7442d7 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -441,7 +441,12 @@ public:
   SimpleLRU<epoch_t, bufferlist> map_bl_cache;
   SimpleLRU<epoch_t, bufferlist> map_bl_inc_cache;
 
-  OSDMapRef get_map(epoch_t e);
+  OSDMapRef try_get_map(epoch_t e);
+  OSDMapRef get_map(epoch_t e) {
+    OSDMapRef ret(try_get_map(e));
+    assert(ret);
+    return ret;
+  }
   OSDMapRef add_map(OSDMap *o) {
     Mutex::Locker l(map_cache_lock);
     return _add_map(o);
@@ -617,7 +622,7 @@ protected:
   // asok
   friend class OSDSocketHook;
   class OSDSocketHook *asok_hook;
-  bool asok_command(string command, string args, ostream& ss);
+  bool asok_command(string command, string args, string format, ostream& ss);
 
 public:
   ClassHandler  *class_handler;
diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc
index 3b8a8714d92..a6cdc9ecffb 100644
--- a/src/osd/OpRequest.cc
+++ b/src/osd/OpRequest.cc
@@ -76,31 +76,27 @@ void OpHistory::dump_ops(utime_t now, Formatter *f)
   f->close_section();
 }
 
-void OpTracker::dump_historic_ops(ostream &ss)
+void OpTracker::dump_historic_ops(Formatter *f)
 {
-  JSONFormatter jf(true);
   Mutex::Locker locker(ops_in_flight_lock);
   utime_t now = ceph_clock_now(g_ceph_context);
-  history.dump_ops(now, &jf);
-  jf.flush(ss);
+  history.dump_ops(now, f);
 }
 
-void OpTracker::dump_ops_in_flight(ostream &ss)
+void OpTracker::dump_ops_in_flight(Formatter *f)
 {
-  JSONFormatter jf(true);
   Mutex::Locker locker(ops_in_flight_lock);
-  jf.open_object_section("ops_in_flight"); // overall dump
-  jf.dump_int("num_ops", ops_in_flight.size());
-  jf.open_array_section("ops"); // list of OpRequests
+  f->open_object_section("ops_in_flight"); // overall dump
+  f->dump_int("num_ops", ops_in_flight.size());
+  f->open_array_section("ops"); // list of OpRequests
   utime_t now = ceph_clock_now(g_ceph_context);
   for (xlist<OpRequest*>::iterator p = ops_in_flight.begin(); !p.end(); ++p) {
-    jf.open_object_section("op");
-    (*p)->dump(now, &jf);
-    jf.close_section(); // this OpRequest
+    f->open_object_section("op");
+    (*p)->dump(now, f);
+    f->close_section(); // this OpRequest
   }
-  jf.close_section(); // list of OpRequests
-  jf.close_section(); // overall dump
-  jf.flush(ss);
+  f->close_section(); // list of OpRequests
+  f->close_section(); // overall dump
 }
 
 void OpTracker::register_inflight_op(xlist<OpRequest*>::item *i)
diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h
index 47b050b8538..a2014472432 100644
--- a/src/osd/OpRequest.h
+++ b/src/osd/OpRequest.h
@@ -59,8 +59,8 @@ class OpTracker {
 
 public:
   OpTracker() : seq(0), ops_in_flight_lock("OpTracker mutex") {}
-  void dump_ops_in_flight(std::ostream& ss);
-  void dump_historic_ops(std::ostream& ss);
+  void dump_ops_in_flight(Formatter *f);
+  void dump_historic_ops(Formatter *f);
   void register_inflight_op(xlist<OpRequest*>::item *i);
   void unregister_inflight_op(OpRequest *i);
 
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 7373357db11..9f957b8e054 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -5032,7 +5032,6 @@ void PG::handle_advance_map(OSDMapRef osdmap, OSDMapRef lastmap,
 			    vector<int>& newup, vector<int>& newacting,
 			    RecoveryCtx *rctx)
 {
-  assert(osdmap->get_epoch() == (lastmap->get_epoch() + 1));
   assert(lastmap->get_epoch() == osdmap_ref->get_epoch());
   assert(lastmap == osdmap_ref);
   dout(10) << "handle_advance_map " << newup << "/" << newacting << dendl;
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 819c9c62f62..10e9a2544a9 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -43,8 +43,10 @@
 #include "msg/Messenger.h"
 #include "messages/MOSDRepScrub.h"
 #include "messages/MOSDPGLog.h"
+#include "common/cmdparse.h"
 #include "common/tracked_int_ptr.hpp"
 #include "common/WorkQueue.h"
+#include "include/str_list.h"
 
 #include <list>
 #include <memory>
@@ -108,10 +110,36 @@ struct PGRecoveryStats {
 	  << i.total_time << "\t"
 	  << i.min_time << "\t" << i.max_time << "\t"
 	  << p->first << "\n";
-	       
     }
   }
 
+  void dump_formatted(Formatter *f) {
+    Mutex::Locker l(lock);
+    f->open_array_section("pg_recovery_stats");
+    for (map<const char *,per_state_info>::iterator p = info.begin();
+	 p != info.end(); ++p) {
+      per_state_info& i = p->second;
+      f->open_object_section("recovery_state");
+      f->dump_int("enter", i.enter);
+      f->dump_int("exit", i.exit);
+      f->dump_int("events", i.events);
+      f->dump_stream("event_time") << i.event_time;
+      f->dump_stream("total_time") << i.total_time;
+      f->dump_stream("min_time") << i.min_time;
+      f->dump_stream("max_time") << i.max_time;
+      vector<string> states;
+      get_str_vec(p->first, "/", states);
+      f->open_array_section("nested_states");
+      for (vector<string>::iterator st = states.begin();
+	   st != states.end(); ++st) {
+	f->dump_string("state", *st);
+      }
+      f->close_section();
+      f->close_section();
+    }
+    f->close_section();
+  }
+
   void log_enter(const char *s) {
     Mutex::Locker l(lock);
     info[s].enter++;
@@ -1786,7 +1814,7 @@ public:
   virtual void do_push_reply(OpRequestRef op) = 0;
   virtual void snap_trimmer() = 0;
 
-  virtual int do_command(vector<string>& cmd, ostream& ss,
+  virtual int do_command(cmdmap_t cmdmap, ostream& ss,
 			 bufferlist& idata, bufferlist& odata) = 0;
 
   virtual bool same_for_read_since(epoch_t e) = 0;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 298d38d6ace..658ea7cb746 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -268,57 +268,43 @@ int ReplicatedPG::get_pgls_filter(bufferlist::iterator& iter, PGLSFilter **pfilt
 
 // ==========================================================
 
-int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss,
+int ReplicatedPG::do_command(cmdmap_t cmdmap, ostream& ss,
 			     bufferlist& idata, bufferlist& odata)
 {
   const pg_missing_t &missing = pg_log.get_missing();
-  map<string, cmd_vartype> cmdmap;
   string prefix;
+  string format;
 
-  if (cmd.empty()) {
-    ss << "no command given";
-    return -EINVAL;
-  }
-
-  stringstream ss2;
-  if (!cmdmap_from_json(cmd, &cmdmap, ss2)) {
-    ss << ss2.str();
-    return -EINVAL;
-  }
-
-  cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
-  if (prefix != "pg") {
-    ss << "ReplicatedPG::do_command: not pg command";
-    return -EINVAL;
-  }
+  cmd_getval(g_ceph_context, cmdmap, "format", format);
+  boost::scoped_ptr<Formatter> f(new_formatter(format));
+  // demand that we have a formatter
+  if (!f)
+    f.reset(new_formatter("json"));
 
   string command;
   cmd_getval(g_ceph_context, cmdmap, "cmd", command);
   if (command == "query") {
-    JSONFormatter jsf(true);
-    jsf.open_object_section("pg");
-    jsf.dump_string("state", pg_state_string(get_state()));
-    jsf.dump_unsigned("epoch", get_osdmap()->get_epoch());
-    jsf.open_array_section("up");
+    f->open_object_section("pg");
+    f->dump_string("state", pg_state_string(get_state()));
+    f->dump_unsigned("epoch", get_osdmap()->get_epoch());
+    f->open_array_section("up");
     for (vector<int>::iterator p = up.begin(); p != up.end(); ++p)
-      jsf.dump_unsigned("osd", *p);
-    jsf.close_section();
-    jsf.open_array_section("acting");
+      f->dump_unsigned("osd", *p);
+    f->close_section();
+    f->open_array_section("acting");
     for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p)
-      jsf.dump_unsigned("osd", *p);
-    jsf.close_section();
-    jsf.open_object_section("info");
-    info.dump(&jsf);
-    jsf.close_section();
-
-    jsf.open_array_section("recovery_state");
-    handle_query_state(&jsf);
-    jsf.close_section();
-
-    jsf.close_section();
-    stringstream dss;
-    jsf.flush(dss);
-    odata.append(dss);
+      f->dump_unsigned("osd", *p);
+    f->close_section();
+    f->open_object_section("info");
+    info.dump(f.get());
+    f->close_section();
+
+    f->open_array_section("recovery_state");
+    handle_query_state(f.get());
+    f->close_section();
+
+    f->close_section();
+    f->flush(odata);
     return 0;
   }
   else if (command == "mark_unfound_lost") {
@@ -352,7 +338,6 @@ int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss,
     return 0;
   }
   else if (command == "list_missing") {
-    JSONFormatter jf(true);
     hobject_t offset;
     string offset_json;
     if (cmd_getval(g_ceph_context, cmdmap, "offset", offset_json)) {
@@ -366,50 +351,48 @@ int ReplicatedPG::do_command(vector<string>& cmd, ostream& ss,
 	return -EINVAL;
       }
     }
-    jf.open_object_section("missing");
+    f->open_object_section("missing");
     {
-      jf.open_object_section("offset");
-      offset.dump(&jf);
-      jf.close_section();
+      f->open_object_section("offset");
+      offset.dump(f.get());
+      f->close_section();
     }
-    jf.dump_int("num_missing", missing.num_missing());
-    jf.dump_int("num_unfound", get_num_unfound());
+    f->dump_int("num_missing", missing.num_missing());
+    f->dump_int("num_unfound", get_num_unfound());
     map<hobject_t,pg_missing_t::item>::const_iterator p = missing.missing.upper_bound(offset);
     {
-      jf.open_array_section("objects");
+      f->open_array_section("objects");
       int32_t num = 0;
       bufferlist bl;
       while (p != missing.missing.end() && num < g_conf->osd_command_max_records) {
-	jf.open_object_section("object");
+	f->open_object_section("object");
 	{
-	  jf.open_object_section("oid");
-	  p->first.dump(&jf);
-	  jf.close_section();
+	  f->open_object_section("oid");
+	  p->first.dump(f.get());
+	  f->close_section();
 	}
-	p->second.dump(&jf);  // have, need keys
+	p->second.dump(f.get());  // have, need keys
 	{
-	  jf.open_array_section("locations");
+	  f->open_array_section("locations");
 	  map<hobject_t,set<int> >::iterator q = missing_loc.find(p->first);
 	  if (q != missing_loc.end())
 	    for (set<int>::iterator r = q->second.begin(); r != q->second.end(); ++r)
-	      jf.dump_int("osd", *r);
-	  jf.close_section();
+	      f->dump_int("osd", *r);
+	  f->close_section();
 	}
-	jf.close_section();
+	f->close_section();
 	++p;
 	num++;
       }
-      jf.close_section();
+      f->close_section();
     }
-    jf.dump_int("more", p != missing.missing.end());
-    jf.close_section();
-    stringstream jss;
-    jf.flush(jss);
-    odata.append(jss);
+    f->dump_int("more", p != missing.missing.end());
+    f->close_section();
+    f->flush(odata);
     return 0;
   };
 
-  ss << "unknown command " << cmd;
+  ss << "unknown pg command " << prefix;
   return -EINVAL;
 }
 
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 9dafe23faa1..7b70b4381ea 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -17,6 +17,7 @@
 #include <boost/optional.hpp>
 
 #include "include/assert.h" 
+#include "common/cmdparse.h"
 
 #include "PG.h"
 #include "OSD.h"
@@ -930,7 +931,8 @@ public:
 	       const hobject_t& ioid);
   ~ReplicatedPG() {}
 
-  int do_command(vector<string>& cmd, ostream& ss, bufferlist& idata, bufferlist& odata);
+  int do_command(cmdmap_t cmdmap, ostream& ss, bufferlist& idata,
+		 bufferlist& odata);
 
   void do_op(OpRequestRef op);
   bool pg_op_must_wait(MOSDOp *op);
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 3a6db4d8315..ca3dcc192b0 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1141,6 +1141,13 @@ struct pg_history_t {
   epoch_t last_epoch_clean;    // lower bound on last epoch the PG was completely clean.
   epoch_t last_epoch_split;    // as parent
   
+  /**
+   * In the event of a map discontinuity, same_*_since may reflect the first
+   * map the osd has seen in the new map sequence rather than the actual start
+   * of the interval.  This is ok since a discontinuity at epoch e means there
+   * must have been a clean interval between e and now and that we cannot be
+   * in the active set during the interval containing e.
+   */
   epoch_t same_up_since;       // same acting set since
   epoch_t same_interval_since;   // same acting AND up set since
   epoch_t same_primary_since;  // same primary at least back through this epoch.
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 9933f853f8f..aefe74088e9 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -2166,154 +2166,154 @@ void Objecter::dump_active()
   }
 }
 
-void Objecter::dump_requests(Formatter& fmt) const
+void Objecter::dump_requests(Formatter *fmt) const
 {
   assert(client_lock.is_locked());
 
-  fmt.open_object_section("requests");
+  fmt->open_object_section("requests");
   dump_ops(fmt);
   dump_linger_ops(fmt);
   dump_pool_ops(fmt);
   dump_pool_stat_ops(fmt);
   dump_statfs_ops(fmt);
   dump_command_ops(fmt);
-  fmt.close_section(); // requests object
+  fmt->close_section(); // requests object
 }
 
-void Objecter::dump_ops(Formatter& fmt) const
+void Objecter::dump_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("ops");
+  fmt->open_array_section("ops");
   for (map<tid_t,Op*>::const_iterator p = ops.begin();
        p != ops.end();
        ++p) {
     Op *op = p->second;
-    fmt.open_object_section("op");
-    fmt.dump_unsigned("tid", op->tid);
-    fmt.dump_stream("pg") << op->pgid;
-    fmt.dump_int("osd", op->session ? op->session->osd : -1);
-    fmt.dump_stream("last_sent") << op->stamp;
-    fmt.dump_int("attempts", op->attempts);
-    fmt.dump_stream("object_id") << op->oid;
-    fmt.dump_stream("object_locator") << op->oloc;
-    fmt.dump_stream("snapid") << op->snapid;
-    fmt.dump_stream("snap_context") << op->snapc;
-    fmt.dump_stream("mtime") << op->mtime;
-
-    fmt.open_array_section("osd_ops");
+    fmt->open_object_section("op");
+    fmt->dump_unsigned("tid", op->tid);
+    fmt->dump_stream("pg") << op->pgid;
+    fmt->dump_int("osd", op->session ? op->session->osd : -1);
+    fmt->dump_stream("last_sent") << op->stamp;
+    fmt->dump_int("attempts", op->attempts);
+    fmt->dump_stream("object_id") << op->oid;
+    fmt->dump_stream("object_locator") << op->oloc;
+    fmt->dump_stream("snapid") << op->snapid;
+    fmt->dump_stream("snap_context") << op->snapc;
+    fmt->dump_stream("mtime") << op->mtime;
+
+    fmt->open_array_section("osd_ops");
     for (vector<OSDOp>::const_iterator it = op->ops.begin();
 	 it != op->ops.end();
 	 ++it) {
-      fmt.dump_stream("osd_op") << *it;
+      fmt->dump_stream("osd_op") << *it;
     }
-    fmt.close_section(); // osd_ops array
+    fmt->close_section(); // osd_ops array
 
-    fmt.close_section(); // op object
+    fmt->close_section(); // op object
   }
-  fmt.close_section(); // ops array
+  fmt->close_section(); // ops array
 }
 
-void Objecter::dump_linger_ops(Formatter& fmt) const
+void Objecter::dump_linger_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("linger_ops");
+  fmt->open_array_section("linger_ops");
   for (map<uint64_t, LingerOp*>::const_iterator p = linger_ops.begin();
        p != linger_ops.end();
        ++p) {
     LingerOp *op = p->second;
-    fmt.open_object_section("linger_op");
-    fmt.dump_unsigned("linger_id", op->linger_id);
-    fmt.dump_stream("pg") << op->pgid;
-    fmt.dump_int("osd", op->session ? op->session->osd : -1);
-    fmt.dump_stream("object_id") << op->oid;
-    fmt.dump_stream("object_locator") << op->oloc;
-    fmt.dump_stream("snapid") << op->snap;
-    fmt.dump_stream("registering") << op->snap;
-    fmt.dump_stream("registered") << op->snap;
-    fmt.close_section(); // linger_op object
+    fmt->open_object_section("linger_op");
+    fmt->dump_unsigned("linger_id", op->linger_id);
+    fmt->dump_stream("pg") << op->pgid;
+    fmt->dump_int("osd", op->session ? op->session->osd : -1);
+    fmt->dump_stream("object_id") << op->oid;
+    fmt->dump_stream("object_locator") << op->oloc;
+    fmt->dump_stream("snapid") << op->snap;
+    fmt->dump_stream("registering") << op->snap;
+    fmt->dump_stream("registered") << op->snap;
+    fmt->close_section(); // linger_op object
   }
-  fmt.close_section(); // linger_ops array
+  fmt->close_section(); // linger_ops array
 }
 
-void Objecter::dump_command_ops(Formatter& fmt) const
+void Objecter::dump_command_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("command_ops");
+  fmt->open_array_section("command_ops");
   for (map<uint64_t, CommandOp*>::const_iterator p = command_ops.begin();
        p != command_ops.end();
        ++p) {
     CommandOp *op = p->second;
-    fmt.open_object_section("command_op");
-    fmt.dump_unsigned("command_id", op->tid);
-    fmt.dump_int("osd", op->session ? op->session->osd : -1);
-    fmt.open_array_section("command");
+    fmt->open_object_section("command_op");
+    fmt->dump_unsigned("command_id", op->tid);
+    fmt->dump_int("osd", op->session ? op->session->osd : -1);
+    fmt->open_array_section("command");
     for (vector<string>::const_iterator q = op->cmd.begin(); q != op->cmd.end(); ++q)
-      fmt.dump_string("word", *q);
-    fmt.close_section();
+      fmt->dump_string("word", *q);
+    fmt->close_section();
     if (op->target_osd >= 0)
-      fmt.dump_int("target_osd", op->target_osd);
+      fmt->dump_int("target_osd", op->target_osd);
     else
-      fmt.dump_stream("target_pg") << op->target_pg;
-    fmt.close_section(); // command_op object
+      fmt->dump_stream("target_pg") << op->target_pg;
+    fmt->close_section(); // command_op object
   }
-  fmt.close_section(); // command_ops array
+  fmt->close_section(); // command_ops array
 }
 
-void Objecter::dump_pool_ops(Formatter& fmt) const
+void Objecter::dump_pool_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("pool_ops");
+  fmt->open_array_section("pool_ops");
   for (map<tid_t, PoolOp*>::const_iterator p = pool_ops.begin();
        p != pool_ops.end();
        ++p) {
     PoolOp *op = p->second;
-    fmt.open_object_section("pool_op");
-    fmt.dump_unsigned("tid", op->tid);
-    fmt.dump_int("pool", op->pool);
-    fmt.dump_string("name", op->name);
-    fmt.dump_int("operation_type", op->pool_op);
-    fmt.dump_unsigned("auid", op->auid);
-    fmt.dump_unsigned("crush_rule", op->crush_rule);
-    fmt.dump_stream("snapid") << op->snapid;
-    fmt.dump_stream("last_sent") << op->last_submit;
-    fmt.close_section(); // pool_op object
+    fmt->open_object_section("pool_op");
+    fmt->dump_unsigned("tid", op->tid);
+    fmt->dump_int("pool", op->pool);
+    fmt->dump_string("name", op->name);
+    fmt->dump_int("operation_type", op->pool_op);
+    fmt->dump_unsigned("auid", op->auid);
+    fmt->dump_unsigned("crush_rule", op->crush_rule);
+    fmt->dump_stream("snapid") << op->snapid;
+    fmt->dump_stream("last_sent") << op->last_submit;
+    fmt->close_section(); // pool_op object
   }
-  fmt.close_section(); // pool_ops array
+  fmt->close_section(); // pool_ops array
 }
 
-void Objecter::dump_pool_stat_ops(Formatter& fmt) const
+void Objecter::dump_pool_stat_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("pool_stat_ops");
+  fmt->open_array_section("pool_stat_ops");
   for (map<tid_t, PoolStatOp*>::const_iterator p = poolstat_ops.begin();
        p != poolstat_ops.end();
        ++p) {
     PoolStatOp *op = p->second;
-    fmt.open_object_section("pool_stat_op");
-    fmt.dump_unsigned("tid", op->tid);
-    fmt.dump_stream("last_sent") << op->last_submit;
+    fmt->open_object_section("pool_stat_op");
+    fmt->dump_unsigned("tid", op->tid);
+    fmt->dump_stream("last_sent") << op->last_submit;
 
-    fmt.open_array_section("pools");
+    fmt->open_array_section("pools");
     for (list<string>::const_iterator it = op->pools.begin();
 	 it != op->pools.end();
 	 ++it) {
-      fmt.dump_string("pool", *it);
+      fmt->dump_string("pool", *it);
     }
-    fmt.close_section(); // pool_op object
+    fmt->close_section(); // pool_op object
 
-    fmt.close_section(); // pool_stat_op object
+    fmt->close_section(); // pool_stat_op object
   }
-  fmt.close_section(); // pool_stat_ops array
+  fmt->close_section(); // pool_stat_ops array
 }
 
-void Objecter::dump_statfs_ops(Formatter& fmt) const
+void Objecter::dump_statfs_ops(Formatter *fmt) const
 {
-  fmt.open_array_section("statfs_ops");
+  fmt->open_array_section("statfs_ops");
   for (map<tid_t, StatfsOp*>::const_iterator p = statfs_ops.begin();
        p != statfs_ops.end();
        ++p) {
     StatfsOp *op = p->second;
-    fmt.open_object_section("statfs_op");
-    fmt.dump_unsigned("tid", op->tid);
-    fmt.dump_stream("last_sent") << op->last_submit;
-    fmt.close_section(); // pool_stat_op object
+    fmt->open_object_section("statfs_op");
+    fmt->dump_unsigned("tid", op->tid);
+    fmt->dump_stream("last_sent") << op->last_submit;
+    fmt->close_section(); // pool_stat_op object
   }
-  fmt.close_section(); // pool_stat_ops array
+  fmt->close_section(); // pool_stat_ops array
 }
 
 Objecter::RequestStateHook::RequestStateHook(Objecter *objecter) :
@@ -2321,14 +2321,16 @@ Objecter::RequestStateHook::RequestStateHook(Objecter *objecter) :
 {
 }
 
-bool Objecter::RequestStateHook::call(std::string command, std::string args, bufferlist& out)
+bool Objecter::RequestStateHook::call(std::string command, std::string args,
+				      std::string format, bufferlist& out)
 {
   stringstream ss;
-  JSONFormatter formatter(true);
+  Formatter *f = new_formatter(format);
   m_objecter->client_lock.Lock();
-  m_objecter->dump_requests(formatter);
+  m_objecter->dump_requests(f);
   m_objecter->client_lock.Unlock();
-  formatter.flush(ss);
+  f->flush(ss);
+  delete f;
   out.append(ss);
   return true;
 }
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index b593bef69d9..da5487f3a84 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -726,7 +726,8 @@ class Objecter {
     Objecter *m_objecter;
   public:
     RequestStateHook(Objecter *objecter);
-    bool call(std::string command, std::string args, bufferlist& out);
+    bool call(std::string command, std::string args, std::string format,
+	      bufferlist& out);
   };
 
   RequestStateHook *m_request_state_hook;
@@ -1233,13 +1234,13 @@ private:
    * Output in-flight requests
    */
   void dump_active();
-  void dump_requests(Formatter& fmt) const;
-  void dump_ops(Formatter& fmt) const;
-  void dump_linger_ops(Formatter& fmt) const;
-  void dump_command_ops(Formatter& fmt) const;
-  void dump_pool_ops(Formatter& fmt) const;
-  void dump_pool_stat_ops(Formatter& fmt) const;
-  void dump_statfs_ops(Formatter& fmt) const;
+  void dump_requests(Formatter *fmt) const;
+  void dump_ops(Formatter *fmt) const;
+  void dump_linger_ops(Formatter *fmt) const;
+  void dump_command_ops(Formatter *fmt) const;
+  void dump_pool_ops(Formatter *fmt) const;
+  void dump_pool_stat_ops(Formatter *fmt) const;
+  void dump_statfs_ops(Formatter *fmt) const;
 
   int get_client_incarnation() const { return client_inc; }
   void set_client_incarnation(int inc) { client_inc = inc; }
diff --git a/src/push_to_kclient.pl b/src/push_to_kclient.pl
deleted file mode 100755
index f76a2bb91f4..00000000000
--- a/src/push_to_kclient.pl
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-
-my $usage = "./push_to_client.pl <path_to_kernel_git_tree>\n";
-
-my $kernel = shift @ARGV || die $usage;
-die $usage unless -d $kernel;
-die $usage unless -e "$kernel/fs/ceph/README";
-
-die "not in a git tree" unless `cd $kernel && git rev-parse HEAD`;
-
-my $dir = '.';
-until (-d "$dir/.git") {
-    $dir .= "/..";
-}
-
-print "pushing changed shared files from $dir to $kernel...\n";
-my @files = split(/\n/, `cat $kernel/fs/ceph/README`);
-for (@files) {
-    next if /^#/;
-    my ($orig, $new) = split(/\s+/, $_);
-    #print "$dir/$orig -> $new\n";
-    system "cp -v $dir/$orig $kernel/$new";
-}
-
-print "pulling changed shared files from $kernel to $dir...\n";
-system "cp -v $kernel/fs/ceph/ioctl.h $dir/src/client/ioctl.h";
-system "cp -v $kernel/fs/btrfs/ioctl.h $dir/src/os/btrfs_ioctl.h";
-
-print "done.\n";
-
diff --git a/src/pybind/ceph_argparse.py b/src/pybind/ceph_argparse.py
index 73d1115f645..9a9db6758a5 100644
--- a/src/pybind/ceph_argparse.py
+++ b/src/pybind/ceph_argparse.py
@@ -263,6 +263,8 @@ class CephIPAddr(CephArgtype):
         if p is not None and long(p) > 65535:
             raise ArgumentValid("{0} not a valid port number".format(p))
         self.val = s
+        self.addr = a
+        self.port = p
 
     def __str__(self):
         return '<IPaddr[:port]>'
@@ -274,6 +276,7 @@ class CephEntityAddr(CephIPAddr):
     def valid(self, s, partial=False):
         ip, nonce = s.split('/')
         super(self.__class__, self).valid(ip)
+        self.nonce = nonce
         self.val = s
 
     def __str__(self):
@@ -824,11 +827,10 @@ def validate(args, signature, partial=False):
         raise ArgumentError("unused arguments: " + str(myargs))
     return d
 
-def validate_command(parsed_args, sigdict, args, verbose=False):
+def validate_command(sigdict, args, verbose=False):
     """
     turn args into a valid dictionary ready to be sent off as JSON,
     validated against sigdict.
-    parsed_args is the namespace back from argparse
     """
     found = []
     valid_dict = {}
@@ -886,11 +888,51 @@ def validate_command(parsed_args, sigdict, args, verbose=False):
                     print >> sys.stderr, concise_sig(cmd['sig'])
             return None
 
-        if parsed_args.output_format:
-            valid_dict['format'] = parsed_args.output_format
-
         return valid_dict
 
+def find_cmd_target(childargs):
+    """
+    Using a minimal validation, figure out whether the command
+    should be sent to a monitor or an osd.  We do this before even
+    asking for the 'real' set of command signatures, so we can ask the
+    right daemon.
+    Returns ('osd', osdid), ('pg', pgid), or ('mon', '')
+    """
+    sig = parse_funcsig(['tell', {'name':'target', 'type':'CephName'}])
+    try:
+        valid_dict = validate(childargs, sig, partial=True)
+    except ArgumentError:
+        pass
+    else:
+        if len(valid_dict) == 2:
+            # revalidate to isolate type and id
+            name = CephName()
+            # if this fails, something is horribly wrong, as it just
+            # validated successfully above
+            name.valid(valid_dict['target'])
+            return name.nametype, name.nameid
+
+    sig = parse_funcsig(['tell', {'name':'pgid', 'type':'CephPgid'}])
+    try:
+        valid_dict = validate(childargs, sig, partial=True)
+    except ArgumentError:
+        pass
+    else:
+        if len(valid_dict) == 2:
+            # pg doesn't need revalidation; the string is fine
+            return 'pg', valid_dict['pgid']
+
+    sig = parse_funcsig(['pg', {'name':'pgid', 'type':'CephPgid'}])
+    try:
+        valid_dict = validate(childargs, sig, partial=True)
+    except ArgumentError:
+        pass
+    else:
+        if len(valid_dict) == 2:
+            return 'pg', valid_dict['pgid']
+
+    return 'mon', ''
+
 def send_command(cluster, target=('mon', ''), cmd=None, inbuf='', timeout=0, 
                  verbose=False):
     """
@@ -916,8 +958,15 @@ def send_command(cluster, target=('mon', ''), cmd=None, inbuf='', timeout=0,
                 cluster.osd_command(osdid, cmd, inbuf, timeout)
 
         elif target[0] == 'pg':
-            # leave it in cmddict for the OSD to use too
             pgid = target[1]
+            # pgid will already be in the command for the pg <pgid>
+            # form, but for tell <pgid>, we need to put it in
+            if cmd:
+                cmddict = json.loads(cmd[0])
+                cmddict['pgid'] = pgid
+            else:
+                cmddict = dict(pgid=pgid)
+            cmd = [json.dumps(cmddict)]
             if verbose:
                 print >> sys.stderr, 'submit {0} for pgid {1}'.\
                     format(cmd, pgid)
diff --git a/src/pybind/ceph_rest_api.py b/src/pybind/ceph_rest_api.py
index fdfe84ee3cb..59e3f60a3a7 100755
--- a/src/pybind/ceph_rest_api.py
+++ b/src/pybind/ceph_rest_api.py
@@ -1,12 +1,13 @@
 #!/usr/bin/python
 # vim: ts=4 sw=4 smarttab expandtab
 
-import os
 import collections
-import ConfigParser
+import contextlib
+import errno
 import json
 import logging
 import logging.handlers
+import os
 import rados
 import textwrap
 import xml.etree.ElementTree
@@ -16,16 +17,19 @@ import flask
 from ceph_argparse import *
 
 #
-# Globals
+# Globals and defaults
 #
 
-APPNAME = '__main__'
+DEFAULT_ADDR = '0.0.0.0'
+DEFAULT_PORT = '5000'
+DEFAULT_ID = 'restapi'
+
 DEFAULT_BASEURL = '/api/v0.1'
-DEFAULT_ADDR = '0.0.0.0:5000'
 DEFAULT_LOG_LEVEL = 'warning'
-DEFAULT_CLIENTNAME = 'client.restapi'
-DEFAULT_LOG_FILE = '/var/log/ceph/' + DEFAULT_CLIENTNAME + '.log'
+# default client name will be 'client.<DEFAULT_ID>'
 
+# 'app' must be global for decorators, etc.
+APPNAME = '__main__'
 app = flask.Flask(APPNAME)
 
 LOGLEVELS = {
@@ -36,130 +40,109 @@ LOGLEVELS = {
     'debug':logging.DEBUG,
 }
 
-# my globals, in a named tuple for usage clarity
-
-glob = collections.namedtuple('gvars', 'cluster urls sigdict baseurl')
-glob.cluster = None
-glob.urls = {}
-glob.sigdict = {}
-glob.baseurl = ''
-
-def load_conf(clustername='ceph', conffile=None):
-    import contextlib
-
-
-    class _TrimIndentFile(object):
-        def __init__(self, fp):
-            self.fp = fp
-
-        def readline(self):
-            line = self.fp.readline()
-            return line.lstrip(' \t')
-
-
-    def _optionxform(s):
-        s = s.replace('_', ' ')
-        s = '_'.join(s.split())
-        return s
-
-
-    def parse(fp):
-        cfg = ConfigParser.RawConfigParser()
-        cfg.optionxform = _optionxform
-        ifp = _TrimIndentFile(fp)
-        cfg.readfp(ifp)
-        return cfg
-
-
-    def load(path):
-        f = file(path)
-        with contextlib.closing(f):
-            return parse(f)
-
-    if conffile:
-        # from CEPH_CONF
-        return load(conffile)
-    else:
-        for path in [
-            '/etc/ceph/{0}.conf'.format(clustername),
-            os.path.expanduser('~/.ceph/{0}.conf'.format(clustername)),
-            '{0}.conf'.format(clustername),
-        ]:
-            if os.path.exists(path):
-                return load(path)
+def find_up_osd(app):
+    '''
+    Find an up OSD.  Return the last one that's up.
+    Returns id as an int.
+    '''
+    ret, outbuf, outs = json_command(app.ceph_cluster, prefix="osd dump",
+                                     argdict=dict(format='json'))
+    if ret:
+        raise EnvironmentError(ret, 'Can\'t get osd dump output')
+    try:
+        osddump = json.loads(outbuf)
+    except:
+        raise EnvironmentError(errno.EINVAL, 'Invalid JSON back from osd dump')
+    osds = [osd['osd'] for osd in osddump['osds'] if osd['up']]
+    if not osds:
+        raise EnvironmentError(errno.ENOENT, 'No up OSDs found')
+    return int(osds[-1])
 
-    raise EnvironmentError('No conf file found for "{0}"'.format(clustername))
 
-def get_conf(cfg, clientname, key):
-    try:
-        return cfg.get(clientname, 'restapi_' + key)
-    except ConfigParser.NoOptionError:
-        return None
+METHOD_DICT = {'r':['GET'], 'w':['PUT', 'DELETE']}
 
-# XXX this is done globally, and cluster connection kept open; there
-# are facilities to pass around global info to requests and to
-# tear down connections between requests if it becomes important
+def api_setup(app, conf, cluster, clientname, clientid, args):
+    '''
+    This is done globally, and cluster connection kept open for
+    the lifetime of the daemon.  librados should assure that even
+    if the cluster goes away and comes back, our connection remains.
 
-def api_setup():
-    """
     Initialize the running instance.  Open the cluster, get the command
-    signatures, module, perms, and help; stuff them away in the glob.urls
-    dict.
-    """
+    signatures, module, perms, and help; stuff them away in the app.ceph_urls
+    dict.  Also save app.ceph_sigdict for help() handling.
+    '''
+    def get_command_descriptions(cluster, target=('mon','')):
+        ret, outbuf, outs = json_command(cluster, target,
+                                         prefix='get_command_descriptions',
+                                         timeout=30)
+        if ret:
+            err = "Can't get command descriptions: {0}".format(outs)
+            app.logger.error(err)
+            raise EnvironmentError(ret, err)
 
-    conffile = os.environ.get('CEPH_CONF', '')
-    clustername = os.environ.get('CEPH_CLUSTER_NAME', 'ceph')
-    clientname = os.environ.get('CEPH_NAME', DEFAULT_CLIENTNAME)
-    try:
-        err = ''
-        cfg = load_conf(clustername, conffile)
-    except Exception as e:
-        err = "Can't load Ceph conf file: " + str(e)
-        app.logger.critical(err)
-        app.logger.critical("CEPH_CONF: %s", conffile)
-        app.logger.critical("CEPH_CLUSTER_NAME: %s", clustername)
-        raise EnvironmentError(err)
-
-    client_logfile = '/var/log/ceph' + clientname + '.log'
-
-    glob.cluster = rados.Rados(name=clientname, conffile=conffile)
-    glob.cluster.connect()
-
-    glob.baseurl = get_conf(cfg, clientname, 'base_url') or DEFAULT_BASEURL
-    if glob.baseurl.endswith('/'):
-        glob.baseurl
-    addr = get_conf(cfg, clientname, 'public_addr') or DEFAULT_ADDR
-    addrport = addr.rsplit(':', 1)
-    addr = addrport[0]
-    if len(addrport) > 1:
-        port = addrport[1]
-    else:
-        port = DEFAULT_ADDR.rsplit(':', 1)
+        try:
+            sigdict = parse_json_funcsigs(outbuf, 'rest')
+        except Exception as e:
+            err = "Can't parse command descriptions: {}".format(e)
+            app.logger.error(err)
+            raise EnvironmentError(err)
+        return sigdict
+
+    app.ceph_cluster = cluster or 'ceph'
+    app.ceph_urls = {}
+    app.ceph_sigdict = {}
+    app.ceph_baseurl = ''
+
+    conf = conf or ''
+    cluster = cluster or 'ceph'
+    clientid = clientid or DEFAULT_ID
+    clientname = clientname or 'client.' + clientid
+
+    app.ceph_cluster = rados.Rados(name=clientname, conffile=conf)
+    app.ceph_cluster.conf_parse_argv(args)
+    app.ceph_cluster.connect()
+
+    app.ceph_baseurl = app.ceph_cluster.conf_get('restapi_base_url') \
+         or DEFAULT_BASEURL
+    if app.ceph_baseurl.endswith('/'):
+        app.ceph_baseurl = app.ceph_baseurl[:-1]
+    addr = app.ceph_cluster.conf_get('public_addr') or DEFAULT_ADDR
+
+    # remove any nonce from the conf value
+    addr = addr.split('/')[0]
+    addr, port = addr.rsplit(':', 1)
+    addr = addr or DEFAULT_ADDR
+    port = port or DEFAULT_PORT
     port = int(port)
 
-    loglevel = get_conf(cfg, clientname, 'log_level') or DEFAULT_LOG_LEVEL
-    logfile = get_conf(cfg, clientname, 'log_file') or client_logfile
+    loglevel = app.ceph_cluster.conf_get('restapi_log_level') \
+        or DEFAULT_LOG_LEVEL
+    logfile = app.ceph_cluster.conf_get('log_file')
     app.logger.addHandler(logging.handlers.WatchedFileHandler(logfile))
     app.logger.setLevel(LOGLEVELS[loglevel.lower()])
     for h in app.logger.handlers:
         h.setFormatter(logging.Formatter(
             '%(asctime)s %(name)s %(levelname)s: %(message)s'))
 
-    ret, outbuf, outs = json_command(glob.cluster,
-                                     prefix='get_command_descriptions')
-    if ret:
-        err = "Can't contact cluster for command descriptions: {0}".format(outs)
-        app.logger.error(err)
-        raise EnvironmentError(ret, err)
-
-    try:
-        glob.sigdict = parse_json_funcsigs(outbuf, 'rest')
-    except Exception as e:
-        err = "Can't parse command descriptions: {}".format(e)
-        app.logger.error(err)
-        raise EnvironmentError(err)
-
-    # glob.sigdict maps "cmdNNN" to a dict containing:
+    app.ceph_sigdict = get_command_descriptions(app.ceph_cluster)
+
+    osdid = find_up_osd(app)
+    if osdid:
+        osd_sigdict = get_command_descriptions(app.ceph_cluster,
+                                               target=('osd', int(osdid)))
+
+        # shift osd_sigdict keys up to fit at the end of the mon's app.ceph_sigdict
+        maxkey = sorted(app.ceph_sigdict.keys())[-1]
+        maxkey = int(maxkey.replace('cmd', ''))
+        osdkey = maxkey + 1
+        for k, v in osd_sigdict.iteritems():
+            newv = v
+            newv['flavor'] = 'tell'
+            globk = 'cmd' + str(osdkey)
+            app.ceph_sigdict[globk] = newv
+            osdkey += 1
+
+    # app.ceph_sigdict maps "cmdNNN" to a dict containing:
     # 'sig', an array of argdescs
     # 'help', the helptext
     # 'module', the Ceph module this command relates to
@@ -167,94 +150,130 @@ def api_setup():
     #    a hint as to whether this is a GET or POST/PUT operation
     # 'avail', a comma-separated list of strings of consumers that should
     #    display this command (filtered by parse_json_funcsigs() above)
-    glob.urls = {}
-    for cmdnum, cmddict in glob.sigdict.iteritems():
+    app.ceph_urls = {}
+    for cmdnum, cmddict in app.ceph_sigdict.iteritems():
         cmdsig = cmddict['sig']
-        url, params = generate_url_and_params(cmdsig)
-        if url in glob.urls:
-            continue
+        flavor = cmddict.get('flavor', 'mon')
+        url, params = generate_url_and_params(app, cmdsig, flavor)
+        perm = cmddict['perm']
+        for k in METHOD_DICT.iterkeys():
+            if k in perm:
+                methods = METHOD_DICT[k]
+        urldict = {'paramsig':params,
+                   'help':cmddict['help'],
+                   'module':cmddict['module'],
+                   'perm':perm,
+                   'flavor':flavor,
+                   'methods':methods,
+                  }
+
+        # app.ceph_urls contains a list of urldicts (usually only one long)
+        if url not in app.ceph_urls:
+            app.ceph_urls[url] = [urldict]
         else:
-            perm = cmddict['perm']
-            urldict = {'paramsig':params,
-                       'help':cmddict['help'],
-                       'module':cmddict['module'],
-                       'perm':perm,
-                      }
-            method_dict = {'r':['GET'],
-                       'w':['PUT', 'DELETE']}
-            for k in method_dict.iterkeys():
-                if k in perm:
-                    methods = method_dict[k]
-            app.add_url_rule(url, url, handler, methods=methods)
-            glob.urls[url] = urldict
-
-            url += '.<fmt>'
-            app.add_url_rule(url, url, handler, methods=methods)
-            glob.urls[url] = urldict
-    app.logger.debug("urls added: %d", len(glob.urls))
+            # If more than one, need to make union of methods of all.
+            # Method must be checked in handler
+            methodset = set(methods)
+            for old_urldict in app.ceph_urls[url]:
+                methodset |= set(old_urldict['methods'])
+            methods = list(methodset)
+            app.ceph_urls[url].append(urldict)
+
+        # add, or re-add, rule with all methods and urldicts
+        app.add_url_rule(url, url, handler, methods=methods)
+        url += '.<fmt>'
+        app.add_url_rule(url, url, handler, methods=methods)
+
+    app.logger.debug("urls added: %d", len(app.ceph_urls))
 
     app.add_url_rule('/<path:catchall_path>', '/<path:catchall_path>',
                      handler, methods=['GET', 'PUT'])
     return addr, port
 
 
-def generate_url_and_params(sig):
-    """
+def generate_url_and_params(app, sig, flavor):
+    '''
     Digest command signature from cluster; generate an absolute
-    (including glob.baseurl) endpoint from all the prefix words,
-    and a dictionary of non-prefix parameters
-    """
+    (including app.ceph_baseurl) endpoint from all the prefix words,
+    and a list of non-prefix param descs
+    '''
 
     url = ''
     params = []
+    # the OSD command descriptors don't include the 'tell <target>', so
+    # tack it onto the front of sig
+    if flavor == 'tell':
+        tellsig = parse_funcsig(['tell',
+                                {'name':'target', 'type':'CephOsdName'}])
+        sig = tellsig + sig
+
     for desc in sig:
+        # prefixes go in the URL path
         if desc.t == CephPrefix:
             url += '/' + desc.instance.prefix
+        # CephChoices with 1 required string (not --) do too, unless
+        # we've already started collecting params, in which case they
+        # too are params
         elif desc.t == CephChoices and \
              len(desc.instance.strings) == 1 and \
              desc.req and \
-             not str(desc.instance).startswith('--'):
-                url += '/' + str(desc.instance)
+             not str(desc.instance).startswith('--') and \
+             not params:
+            url += '/' + str(desc.instance)
         else:
-            params.append(desc)
-    return glob.baseurl + url, params
+            # tell/<target> is a weird case; the URL includes what
+            # would everywhere else be a parameter
+            if flavor == 'tell' and  \
+              (desc.t, desc.name) == (CephOsdName, 'target'):
+                url += '/<target>'
+            else:
+                params.append(desc)
+
+    return app.ceph_baseurl + url, params
 
 
-def concise_sig_for_uri(sig):
-    """
+#
+# end setup (import-time) functions, begin request-time functions
+#
+
+def concise_sig_for_uri(sig, flavor):
+    '''
     Return a generic description of how one would send a REST request for sig
-    """
+    '''
     prefix = []
     args = []
+    ret = ''
+    if flavor == 'tell':
+        ret = 'tell/<osdid-or-pgid>/'
     for d in sig:
         if d.t == CephPrefix:
             prefix.append(d.instance.prefix)
         else:
             args.append(d.name + '=' + str(d))
-    sig = '/'.join(prefix)
+    ret += '/'.join(prefix)
     if args:
-        sig += '?' + '&'.join(args)
-    return sig
+        ret += '?' + '&'.join(args)
+    return ret
 
 def show_human_help(prefix):
-    """
+    '''
     Dump table showing commands matching prefix
-    """
-    # XXX this really needs to be a template
-    #s = '<html><body><style>.colhalf { width: 50%;} body{word-wrap:break-word;}</style>'
-    #s += '<table border=1><col class=colhalf /><col class=colhalf />'
-    #s += '<th>Possible commands:</th>'
-    # XXX the above mucking with css doesn't cause sensible columns.
+    '''
+    # XXX There ought to be a better discovery mechanism than an HTML table
     s = '<html><body><table border=1><th>Possible commands:</th><th>Method</th><th>Description</th>'
 
-    possible = []
     permmap = {'r':'GET', 'rw':'PUT'}
     line = ''
-    for cmdsig in sorted(glob.sigdict.itervalues(), cmp=descsort):
+    for cmdsig in sorted(app.ceph_sigdict.itervalues(), cmp=descsort):
         concise = concise_sig(cmdsig['sig'])
+        flavor = cmdsig.get('flavor', 'mon')
+        if flavor == 'tell':
+            concise = 'tell/<target>/' + concise
         if concise.startswith(prefix):
             line = ['<tr><td>']
-            wrapped_sig = textwrap.wrap(concise_sig_for_uri(cmdsig['sig']), 40)
+            wrapped_sig = textwrap.wrap(
+                concise_sig_for_uri(cmdsig['sig'], flavor), 40
+            )
             for sigline in wrapped_sig:
                 line.append(flask.escape(sigline) + '\n')
             line.append('</td><td>')
@@ -272,23 +291,22 @@ def show_human_help(prefix):
 
 @app.before_request
 def log_request():
-    """
+    '''
     For every request, log it.  XXX Probably overkill for production
-    """
+    '''
     app.logger.info(flask.request.url + " from " + flask.request.remote_addr + " " + flask.request.user_agent.string)
     app.logger.debug("Accept: %s", flask.request.accept_mimetypes.values())
 
-
 @app.route('/')
 def root_redir():
-    return flask.redirect(glob.baseurl)
+    return flask.redirect(app.ceph_baseurl)
 
 def make_response(fmt, output, statusmsg, errorcode):
-    """
+    '''
     If formatted output, cobble up a response object that contains the
     output and status wrapped in enclosing objects; if nonformatted, just
-    use output.  Return HTTP status errorcode in any event.
-    """
+    use output+status.  Return HTTP status errorcode in any event.
+    '''
     response = output
     if fmt:
         if 'json' in fmt:
@@ -300,6 +318,7 @@ def make_response(fmt, output, statusmsg, errorcode):
                 return flask.make_response("Error decoding JSON from " +
                                            output, 500)
         elif 'xml' in fmt:
+            # XXX
             # one is tempted to do this with xml.etree, but figuring out how
             # to 'un-XML' the XML-dumped output so it can be reassembled into
             # a piece of the tree here is beyond me right now.
@@ -319,22 +338,32 @@ def make_response(fmt, output, statusmsg, errorcode):
     {1}
   </status>
 </response>'''.format(response, xml.sax.saxutils.escape(statusmsg))
+    else:
+        if not 200 <= errorcode < 300:
+            response = response + '\n' + statusmsg + '\n'
 
     return flask.make_response(response, errorcode)
 
-def handler(catchall_path=None, fmt=None):
-    """
-    Main endpoint handler; generic for every endpoint
-    """
+def handler(catchall_path=None, fmt=None, target=None):
+    '''
+    Main endpoint handler; generic for every endpoint, including catchall.
+    Handles the catchall, anything with <.fmt>, anything with embedded
+    <target>.  Partial match or ?help cause the HTML-table
+    "show_human_help" output.  
+    '''
 
-    if (catchall_path):
-        ep = catchall_path.replace('.<fmt>', '')
-    else:
-        ep = flask.request.endpoint.replace('.<fmt>', '')
+    ep = catchall_path or flask.request.endpoint
+    ep = ep.replace('.<fmt>', '')
 
     if ep[0] != '/':
         ep = '/' + ep
 
+    # demand that endpoint begin with app.ceph_baseurl
+    if not ep.startswith(app.ceph_baseurl):
+        return make_response(fmt, '', 'Page not found', 404)
+
+    rel_ep = ep[len(app.ceph_baseurl)+1:]
+
     # Extensions override Accept: headers override defaults
     if not fmt:
         if 'application/json' in flask.request.accept_mimetypes.values():
@@ -342,15 +371,38 @@ def handler(catchall_path=None, fmt=None):
         elif 'application/xml' in flask.request.accept_mimetypes.values():
             fmt = 'xml'
 
-    # demand that endpoint begin with glob.baseurl
-    if not ep.startswith(glob.baseurl):
-        return make_response(fmt, '', 'Page not found', 404)
+    prefix = ''
+    pgid = None
+    cmdtarget = 'mon', ''
 
-    relative_endpoint = ep[len(glob.baseurl)+1:]
-    prefix = ' '.join(relative_endpoint.split('/')).strip()
+    if target:
+        # got tell/<target>; validate osdid or pgid
+        name = CephOsdName()
+        pgidobj = CephPgid()
+        try:
+            name.valid(target)
+        except ArgumentError:
+            # try pgid
+            try:
+                pgidobj.valid(target)
+            except ArgumentError:
+                return flask.make_response("invalid osdid or pgid", 400)
+            else:
+                # it's a pgid
+                pgid = pgidobj.val
+                cmdtarget = 'pg', pgid
+        else:
+            # it's an osd
+            cmdtarget = name.nametype, name.nameid
+
+        # prefix does not include tell/<target>/
+        prefix = ' '.join(rel_ep.split('/')[2:]).strip()
+    else:
+        # non-target command: prefix is entire path
+        prefix = ' '.join(rel_ep.split('/')).strip()
 
     # show "match as much as you gave me" help for unknown endpoints
-    if not ep in glob.urls:
+    if not ep in app.ceph_urls:
         helptext = show_human_help(prefix)
         if helptext:
             resp = flask.make_response(helptext, 400)
@@ -359,43 +411,59 @@ def handler(catchall_path=None, fmt=None):
         else:
             return make_response(fmt, '', 'Invalid endpoint ' + ep, 400)
 
-    urldict = glob.urls[ep]
-    paramsig = urldict['paramsig']
-
-    # allow '?help' for any specifically-known endpoint
-    if 'help' in flask.request.args:
-        response = flask.make_response('{0}: {1}'.\
-            format(prefix + concise_sig(paramsig), urldict['help']))
-        response.headers['Content-Type'] = 'text/plain'
-        return response
-
-    # if there are parameters for this endpoint, process them
-    if paramsig:
-        args = {}
-        for k, l in flask.request.args.iterlists():
-            if len(l) == 1:
-                args[k] = l[0]
-            else:
-                args[k] = l
-
-        # is this a valid set of params?
-        try:
-            argdict = validate(args, paramsig)
-        except Exception as e:
-            return make_response(fmt, '', str(e) + '\n', 400)
-    else:
-        # no parameters for this endpoint; complain if args are supplied
-        if flask.request.args:
-            return make_response(fmt, '', ep + 'takes no params', 400)
-        argdict = {}
+    found = None
+    exc = ''
+    for urldict in app.ceph_urls[ep]:
+        if flask.request.method not in urldict['methods']:
+            continue
+        paramsig = urldict['paramsig']
+
+        # allow '?help' for any specifically-known endpoint
+        if 'help' in flask.request.args:
+            response = flask.make_response('{0}: {1}'.\
+                format(prefix + concise_sig(paramsig), urldict['help']))
+            response.headers['Content-Type'] = 'text/plain'
+            return response
+
+        # if there are parameters for this endpoint, process them
+        if paramsig:
+            args = {}
+            for k, l in flask.request.args.iterlists():
+                if len(l) == 1:
+                    args[k] = l[0]
+                else:
+                    args[k] = l
+
+            # is this a valid set of params?
+            try:
+                argdict = validate(args, paramsig)
+                found = urldict
+                break
+            except Exception as e:
+                exc += str(e)
+                continue
+        else:
+            if flask.request.args:
+                continue
+            found = urldict
+            argdict = {}
+            break
 
+    if not found:
+        return make_response(fmt, '', exc + '\n', 400)
 
     argdict['format'] = fmt or 'plain'
-    argdict['module'] = urldict['module']
-    argdict['perm'] = urldict['perm']
+    argdict['module'] = found['module']
+    argdict['perm'] = found['perm']
+    if pgid:
+        argdict['pgid'] = pgid
+
+    if not cmdtarget:
+        cmdtarget = ('mon', '')
 
     app.logger.debug('sending command prefix %s argdict %s', prefix, argdict)
-    ret, outbuf, outs = json_command(glob.cluster, prefix=prefix,
+    ret, outbuf, outs = json_command(app.ceph_cluster, prefix=prefix,
+                                     target=cmdtarget,
                                      inbuf=flask.request.data, argdict=argdict)
     if ret:
         return make_response(fmt, '', 'Error: {0} ({1})'.format(outs, ret), 400)
@@ -408,4 +476,12 @@ def handler(catchall_path=None, fmt=None):
     response.headers['Content-Type'] = contenttype
     return response
 
-addr, port = api_setup()
+#
+# Main entry point from wrapper/WSGI server: call with cmdline args,
+# get back the WSGI app entry point
+#
+def generate_app(conf, cluster, clientname, clientid, args):
+    addr, port = api_setup(app, conf, cluster, clientname, clientid, args)
+    app.ceph_addr = addr
+    app.ceph_port = port
+    return app
diff --git a/src/pybind/cephfs.py b/src/pybind/cephfs.py
index f89f53fb194..80b7e4b773f 100644
--- a/src/pybind/cephfs.py
+++ b/src/pybind/cephfs.py
@@ -2,9 +2,7 @@
 This module is a thin wrapper around libcephfs.
 """
 from ctypes import CDLL, c_char_p, c_size_t, c_void_p, c_int, c_long, c_uint, c_ulong, \
-    create_string_buffer, byref, Structure, c_uint64, c_ubyte, pointer, \
-    CFUNCTYPE
-import ctypes
+    create_string_buffer, byref, Structure
 import errno
 
 class Error(Exception):
diff --git a/src/pybind/rados.py b/src/pybind/rados.py
index 13ce006f8ce..e543ff79305 100644
--- a/src/pybind/rados.py
+++ b/src/pybind/rados.py
@@ -278,7 +278,7 @@ Rados object in state %s." % (self.state))
         ret = run_in_thread(self.librados.rados_conf_parse_argv_remainder,
                             (self.cluster, len(args), cargs, cretargs))
         if ret:
-           raise make_ex("error calling conf_parse_argv_remainder")
+            raise make_ex(ret, "error calling conf_parse_argv_remainder")
 
         # cretargs was allocated with fixed length; collapse return
         # list to eliminate any missing args
diff --git a/src/pybind/rbd.py b/src/pybind/rbd.py
index 9d71738e728..6e9ca8a2252 100644
--- a/src/pybind/rbd.py
+++ b/src/pybind/rbd.py
@@ -17,7 +17,7 @@ methods, a :class:`TypeError` will be raised.
 # Copyright 2011 Josh Durgin
 from ctypes import CDLL, c_char, c_char_p, c_size_t, c_void_p, c_int, \
     create_string_buffer, byref, Structure, c_uint64, c_int64, c_uint8, \
-    CFUNCTYPE, pointer
+    CFUNCTYPE
 import ctypes
 import errno
 
diff --git a/src/rbd.cc b/src/rbd.cc
index c9b2f0a272c..7f90c1f118e 100644
--- a/src/rbd.cc
+++ b/src/rbd.cc
@@ -60,8 +60,6 @@
 #include <sys/param.h>
 #endif
 
-#include "include/fiemap.h"
-
 #define MAX_SECRET_LEN 1000
 #define MAX_POOL_NAME_SIZE 128
 
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 67f5f1c68b3..364f60f78f7 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -1815,7 +1815,7 @@ next:
       cerr << "ERROR: failed to read input: " << cpp_strerror(-ret) << std::endl;
       return ret;
     }
-    ret = store->meta_mgr->put(metadata_key, bl);
+    ret = store->meta_mgr->put(metadata_key, bl, RGWMetadataHandler::APPLY_ALWAYS);
     if (ret < 0) {
       cerr << "ERROR: can't put key: " << cpp_strerror(-ret) << std::endl;
       return -ret;
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 8de5a3d101f..bf8da99d616 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -1397,7 +1397,8 @@ public:
     return 0;
   }
 
-  int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) {
+  int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+          time_t mtime, JSONObj *obj, sync_type_t sync_type) {
     RGWBucketEntryPoint be, old_be;
     decode_json_obj(be, obj);
 
@@ -1410,6 +1411,12 @@ public:
     if (ret < 0 && ret != -ENOENT)
       return ret;
 
+    // are we actually going to perform this put, or is it too old?
+    if (!check_versions(old_ot.read_version, orig_mtime,
+			objv_tracker.write_version, mtime, sync_type)) {
+      return STATUS_NO_APPLY;
+    }
+
     objv_tracker.read_version = old_ot.read_version; /* maintain the obj version we just read */
 
     ret = store->put_bucket_entrypoint_info(entry, be, false, objv_tracker, mtime, &attrs);
@@ -1540,7 +1547,8 @@ public:
     return 0;
   }
 
-  int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) {
+  int put(RGWRados *store, string& oid, RGWObjVersionTracker& objv_tracker,
+          time_t mtime, JSONObj *obj, sync_type_t sync_type) {
     RGWBucketCompleteInfo bci, old_bci;
     decode_json_obj(bci, obj);
 
@@ -1566,6 +1574,13 @@ public:
       bci.info.bucket.index_pool = old_bci.info.bucket.index_pool;
     }
 
+    // are we actually going to perform this put, or is it too old?
+    if (!check_versions(old_bci.info.objv_tracker.read_version, orig_mtime,
+			objv_tracker.write_version, mtime, sync_type)) {
+      objv_tracker.read_version = old_bci.info.objv_tracker.read_version;
+      return STATUS_NO_APPLY;
+    }
+
     /* record the read version (if any), store the new version */
     bci.info.objv_tracker.read_version = old_bci.info.objv_tracker.read_version;
     bci.info.objv_tracker.write_version = objv_tracker.write_version;
@@ -1580,7 +1595,7 @@ public:
     if (ret < 0)
       return ret;
 
-    return 0;
+    return STATUS_APPLIED;
   }
 
   struct list_keys_info {
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 7f224a798f5..543bdf21377 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -99,6 +99,8 @@ using ceph::crypto::MD5;
 #define STATUS_NO_CONTENT        1902
 #define STATUS_PARTIAL_CONTENT   1903
 #define STATUS_REDIRECT          1904
+#define STATUS_NO_APPLY          1905
+#define STATUS_APPLIED           1906
 
 #define ERR_INVALID_BUCKET_NAME  2000
 #define ERR_INVALID_OBJECT_NAME  2001
diff --git a/src/rgw/rgw_formats.h b/src/rgw/rgw_formats.h
index 0ae917fe7d1..3653ef4f480 100644
--- a/src/rgw/rgw_formats.h
+++ b/src/rgw/rgw_formats.h
@@ -36,6 +36,9 @@ public:
   virtual void dump_string(const char *name, std::string s);
   virtual std::ostream& dump_stream(const char *name);
   virtual void dump_format(const char *name, const char *fmt, ...);
+  virtual void dump_format_unquoted(const char *name, const char *fmt, ...) {
+    assert(0 == "not implemented");
+  }
   virtual int get_len() const;
   virtual void write_raw_data(const char *data);
 
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc
index ac8c703f5e0..6da1ff5ab24 100644
--- a/src/rgw/rgw_metadata.cc
+++ b/src/rgw/rgw_metadata.cc
@@ -194,7 +194,8 @@ public:
   virtual string get_type() { return string(); }
 
   virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) { return -ENOTSUP; }
-  virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) { return -ENOTSUP; }
+  virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+                  time_t mtime, JSONObj *obj, sync_type_t sync_type) { return -ENOTSUP; }
 
   virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) {}
 
@@ -242,6 +243,7 @@ RGWMetadataManager::~RGWMetadataManager()
   }
 
   handlers.clear();
+  delete md_log;
 }
 
 int RGWMetadataManager::register_handler(RGWMetadataHandler *handler)
@@ -328,7 +330,9 @@ int RGWMetadataManager::get(string& metadata_key, Formatter *f)
   return 0;
 }
 
-int RGWMetadataManager::put(string& metadata_key, bufferlist& bl)
+int RGWMetadataManager::put(string& metadata_key, bufferlist& bl,
+                            RGWMetadataHandler::sync_type_t sync_type,
+                            obj_version *existing_version)
 {
   RGWMetadataHandler *handler;
   string entry;
@@ -357,7 +361,11 @@ int RGWMetadataManager::put(string& metadata_key, bufferlist& bl)
     return -EINVAL;
   }
 
-  return handler->put(store, entry, objv_tracker, mtime, jo);
+  ret = handler->put(store, entry, objv_tracker, mtime, jo, sync_type);
+  if (existing_version) {
+    *existing_version = objv_tracker.read_version;
+  }
+  return ret;
 }
 
 int RGWMetadataManager::remove(string& metadata_key)
@@ -505,15 +513,15 @@ int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section,
   /* if write version has not been set, and there's a read version, set it so that we can
    * log it
    */
-  if (objv_tracker && objv_tracker->read_version.ver &&
-      !objv_tracker->write_version.ver) {
-    objv_tracker->write_version = objv_tracker->read_version;
-    objv_tracker->write_version.ver++;
+  if (objv_tracker) {
+    if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
+      objv_tracker->write_version = objv_tracker->read_version;
+      objv_tracker->write_version.ver++;
+    }
+    log_data.read_version = objv_tracker->read_version;
+    log_data.write_version = objv_tracker->write_version;
   }
 
-  log_data.read_version = objv_tracker->read_version;
-  log_data.write_version = objv_tracker->write_version;
-
   log_data.status = op_type;
 
   bufferlist logbl;
diff --git a/src/rgw/rgw_metadata.h b/src/rgw/rgw_metadata.h
index 2cc9110191a..50649b6f901 100644
--- a/src/rgw/rgw_metadata.h
+++ b/src/rgw/rgw_metadata.h
@@ -44,14 +44,30 @@ class RGWMetadataManager;
 class RGWMetadataHandler {
   friend class RGWMetadataManager;
 
-protected:
-  virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0;
 public:
+  enum sync_type_t {
+    APPLY_ALWAYS,
+    APPLY_UPDATES,
+    APPLY_NEWER
+  };
+  static bool string_to_sync_type(const string& sync_string,
+                                  sync_type_t& type) {
+    if (sync_string.compare("update-by-version") == 0)
+      type = APPLY_UPDATES;
+    else if (sync_string.compare("update-by-timestamp") == 0)
+      type = APPLY_NEWER;
+    else if (sync_string.compare("always") == 0)
+      type = APPLY_ALWAYS;
+    else
+      return false;
+    return true;
+  }
   virtual ~RGWMetadataHandler() {}
   virtual string get_type() = 0;
 
   virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0;
-  virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) = 0;
+  virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+                  time_t mtime, JSONObj *obj, sync_type_t type) = 0;
   virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0;
 
   virtual int list_keys_init(RGWRados *store, void **phandle) = 0;
@@ -62,6 +78,33 @@ public:
   virtual void get_hash_key(const string& section, const string& key, string& hash_key) {
     hash_key = section + ":" + key;
   }
+
+protected:
+  virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) = 0;
+  /**
+   * Compare an incoming versus on-disk tag/version+mtime combo against
+   * the sync mode to see if the new one should replace the on-disk one.
+   *
+   * @return true if the update should proceed, false otherwise.
+   */
+  bool check_versions(const obj_version& ondisk, const time_t& ondisk_time,
+                      const obj_version& incoming, const time_t& incoming_time,
+                      sync_type_t sync_mode) {
+    switch (sync_mode) {
+    case APPLY_UPDATES:
+      if ((ondisk.tag != incoming.tag) ||
+	  (ondisk.ver >= incoming.ver))
+	return false;
+      break;
+    case APPLY_NEWER:
+      if (ondisk_time >= incoming_time)
+	return false;
+      break;
+    case APPLY_ALWAYS: //deliberate fall-thru -- we always apply!
+    default: break;
+    }
+    return true;
+  }
 };
 
 #define META_LOG_OBJ_PREFIX "meta.log."
@@ -86,9 +129,7 @@ class RGWMetadataLog {
   }
 
 public:
-  RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store) {
-    prefix = META_LOG_OBJ_PREFIX;
-  }
+  RGWMetadataLog(CephContext *_cct, RGWRados *_store) : cct(_cct), store(_store), prefix(META_LOG_OBJ_PREFIX) {}
 
   int add_entry(RGWRados *store, RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl);
 
@@ -102,7 +143,7 @@ public:
 
     bool done;
 
-    LogListCtx() : done(false) {}
+    LogListCtx() : cur_shard(0), done(false) {}
   };
 
   void init_list_entries(int shard_id, utime_t& from_time, utime_t& end_time, string& marker, void **handle);
@@ -152,7 +193,9 @@ public:
                 map<string, bufferlist>* rmattrs,
                 RGWObjVersionTracker *objv_tracker);
   int get(string& metadata_key, Formatter *f);
-  int put(string& metadata_key, bufferlist& bl);
+  int put(string& metadata_key, bufferlist& bl,
+          RGWMetadataHandler::sync_type_t sync_mode,
+          obj_version *existing_version = NULL);
   int remove(string& metadata_key);
 
   int list_keys_init(string& section, void **phandle);
diff --git a/src/rgw/rgw_rest_log.cc b/src/rgw/rgw_rest_log.cc
index 544adbe7965..7b50986cb37 100644
--- a/src/rgw/rgw_rest_log.cc
+++ b/src/rgw/rgw_rest_log.cc
@@ -88,6 +88,8 @@ void RGWOp_MDLog_List::execute() {
     if (!max_entries_str.empty()) 
       max_entries -= entries.size();
   } while (truncated && (max_entries > 0));
+
+  meta_log->complete_list_entries(handle);
 }
 
 void RGWOp_MDLog_List::send_response() {
diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc
index 35ec0ab9b04..0705a46ed6c 100644
--- a/src/rgw/rgw_rest_metadata.cc
+++ b/src/rgw/rgw_rest_metadata.cc
@@ -161,12 +161,43 @@ void RGWOp_Metadata_Put::execute() {
   
   frame_metadata_key(s, metadata_key);
   
-  http_ret = store->meta_mgr->put(metadata_key, bl);
+  RGWMetadataHandler::sync_type_t sync_type = RGWMetadataHandler::APPLY_ALWAYS;
+
+  bool mode_exists = false;
+  string mode_string = s->info.args.get("sync-type", &mode_exists);
+  if (mode_exists) {
+    bool parsed = RGWMetadataHandler::string_to_sync_type(mode_string,
+                                                          sync_type);
+    if (!parsed) {
+      http_ret = -EINVAL;
+      return;
+    }
+  }
+
+  http_ret = store->meta_mgr->put(metadata_key, bl, sync_type, &ondisk_version);
   if (http_ret < 0) {
     dout(5) << "ERROR: can't put key: " << cpp_strerror(http_ret) << dendl;
     return;
   }
-  http_ret = 0;
+  // translate internal codes into return header
+  if (http_ret == STATUS_NO_APPLY)
+    update_status = "skipped";
+  else if (http_ret == STATUS_APPLIED)
+    update_status = "applied";
+}
+
+void RGWOp_Metadata_Put::send_response() {
+  int http_return_code = http_ret;
+  if ((http_ret == STATUS_NO_APPLY) || (http_ret == STATUS_APPLIED))
+    http_return_code = STATUS_NO_CONTENT;
+  set_req_state_err(s, http_return_code);
+  dump_errno(s);
+  stringstream ver_stream;
+  ver_stream << "ver:" << ondisk_version.ver
+	     <<",tag:" << ondisk_version.tag;
+  dump_pair(s, "RGWX_UPDATE_STATUS", update_status.c_str());
+  dump_pair(s, "RGWX_UPDATE_VERSION", ver_stream.str().c_str());
+  end_header(s);
 }
 
 void RGWOp_Metadata_Delete::execute() {
diff --git a/src/rgw/rgw_rest_metadata.h b/src/rgw/rgw_rest_metadata.h
index 59d7c5f7045..7f3cf1f2207 100644
--- a/src/rgw/rgw_rest_metadata.h
+++ b/src/rgw/rgw_rest_metadata.h
@@ -40,6 +40,8 @@ public:
 
 class RGWOp_Metadata_Put : public RGWRESTOp {
   int get_data(bufferlist& bl);
+  string update_status;
+  obj_version ondisk_version;
 public:
   RGWOp_Metadata_Put() {}
   ~RGWOp_Metadata_Put() {}
@@ -48,6 +50,7 @@ public:
     return caps.check_cap("metadata", RGW_CAP_WRITE);
   }
   void execute();
+  void send_response();
   virtual const string name() { return "set_metadata"; }
 };
 
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 6cdcaa62935..6fcecd4a98d 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -2295,22 +2295,29 @@ public:
     return 0;
   }
 
-  int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, time_t mtime, JSONObj *obj) {
+  int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
+          time_t mtime, JSONObj *obj, sync_type_t sync_mode) {
     RGWUserInfo info;
 
     decode_json_obj(info, obj);
 
     RGWUserInfo old_info;
-    int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker);
+    time_t orig_mtime;
+    int ret = rgw_get_user_info_by_uid(store, entry, old_info, &objv_tracker, &orig_mtime);
     if (ret < 0 && ret != -ENOENT)
       return ret;
 
+    // are we actually going to perform this put, or is it too old?
+    if (!check_versions(objv_tracker.read_version, orig_mtime,
+			objv_tracker.write_version, mtime, sync_mode)) {
+      return STATUS_NO_APPLY;
+    }
 
     ret = rgw_store_user_info(store, info, &old_info, &objv_tracker, mtime, false);
     if (ret < 0)
       return ret;
 
-    return 0;
+    return STATUS_APPLIED;
   }
 
   struct list_keys_info {
diff --git a/src/test/ObjectMap/test_store_tool/test_store_tool.cc b/src/test/ObjectMap/test_store_tool/test_store_tool.cc
index 4ab62892880..ace91220df6 100644
--- a/src/test/ObjectMap/test_store_tool/test_store_tool.cc
+++ b/src/test/ObjectMap/test_store_tool/test_store_tool.cc
@@ -19,6 +19,12 @@
 
 #include "os/LevelDBStore.h"
 
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "common/config.h"
+
 using namespace std;
 
 class StoreTool
@@ -98,15 +104,27 @@ void usage(const char *pname)
     << std::endl;
 }
 
-int main(int argc, char *argv[])
+int main(int argc, const char *argv[])
 {
-  if (argc < 3) {
+  vector<const char*> args;
+  argv_to_vec(argc, argv, args);
+  env_to_vec(args);
+
+  global_init(
+      NULL, args,
+      CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+  common_init_finish(g_ceph_context);
+
+
+  if (args.size() < 2) {
     usage(argv[0]);
     return 1;
   }
 
-  string path(argv[1]);
-  string cmd(argv[2]);
+  string path(args[0]);
+  string cmd(args[1]);
+
+  std::cout << "path: " << path << " cmd " << cmd << std::endl;
 
   StoreTool st(path);
 
diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc
index 0dbcb6d2f75..8f67918e644 100644
--- a/src/test/admin_socket.cc
+++ b/src/test/admin_socket.cc
@@ -64,13 +64,13 @@ TEST(AdminSocket, SendNoOp) {
   ASSERT_EQ(true, asoct.init(get_rand_socket_path()));
   AdminSocketClient client(get_rand_socket_path());
   string version;
-  ASSERT_EQ("", client.do_request("0", &version));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"0\"}", &version));
   ASSERT_EQ(CEPH_ADMIN_SOCK_VERSION, version);
   ASSERT_EQ(true, asoct.shutdown());
 }
 
 class MyTest : public AdminSocketHook {
-  bool call(std::string command, std::string args, bufferlist& result) {
+  bool call(std::string command, std::string args, std::string format, bufferlist& result) {
     result.append(command);
     result.append("|");
     result.append(args);
@@ -87,13 +87,13 @@ TEST(AdminSocket, RegisterCommand) {
   AdminSocketClient client(get_rand_socket_path());
   ASSERT_EQ(0, asoct.m_asokc->register_command("test", "test", new MyTest(), ""));
   string result;
-  ASSERT_EQ("", client.do_request("test", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test\"}", &result));
   ASSERT_EQ("test|", result);
   ASSERT_EQ(true, asoct.shutdown());
 }
 
 class MyTest2 : public AdminSocketHook {
-  bool call(std::string command, std::string args, bufferlist& result) {
+  bool call(std::string command, std::string args, std::string format, bufferlist& result) {
     result.append(command);
     result.append("|");
     result.append(args);
@@ -111,20 +111,20 @@ TEST(AdminSocket, RegisterCommandPrefixes) {
   ASSERT_EQ(0, asoct.m_asokc->register_command("test", "test", new MyTest(), ""));
   ASSERT_EQ(0, asoct.m_asokc->register_command("test command", "test command", new MyTest2(), ""));
   string result;
-  ASSERT_EQ("", client.do_request("test", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test\"}", &result));
   ASSERT_EQ("test|", result);
-  ASSERT_EQ("", client.do_request("test command", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test command\"}", &result));
   ASSERT_EQ("test command|", result);
-  ASSERT_EQ("", client.do_request("test command post", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test command post\"}", &result));
   ASSERT_EQ("test command|post", result);
-  ASSERT_EQ("", client.do_request("test command  post", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test command  post\"}", &result));
   ASSERT_EQ("test command| post", result);
-  ASSERT_EQ("", client.do_request("test this thing", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test this thing\"}", &result));
   ASSERT_EQ("test|this thing", result);
 
-  ASSERT_EQ("", client.do_request("test  command post", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test  command post\"}", &result));
   ASSERT_EQ("test| command post", result);
-  ASSERT_EQ("", client.do_request("test  this thing", &result));
+  ASSERT_EQ("", client.do_request("{\"prefix\":\"test  this thing\"}", &result));
   ASSERT_EQ("test| this thing", result);
   ASSERT_EQ(true, asoct.shutdown());
 }
diff --git a/src/test/bench/small_io_bench_fs.cc b/src/test/bench/small_io_bench_fs.cc
index 61fbacc5570..a37a7e71153 100644
--- a/src/test/bench/small_io_bench_fs.cc
+++ b/src/test/bench/small_io_bench_fs.cc
@@ -32,7 +32,10 @@ struct MorePrinting : public DetailedStatCollector::AdditionalPrinting {
   MorePrinting(CephContext *cct) : cct(cct) {}
   void operator()(std::ostream *out) {
     bufferlist bl;
-    cct->get_perfcounters_collection()->write_json_to_buf(bl, 0);
+    Formatter *f = new_formatter("json-pretty");
+    cct->get_perfcounters_collection()->dump_formatted(f, 0);
+    f->flush(bl);
+    delete f;
     bl.append('\0');
     *out << bl.c_str() << std::endl;
   }
diff --git a/src/test/cli/ceph-authtool/help.t b/src/test/cli/ceph-authtool/help.t
index 7434ec4a7c0..062c967a154 100644
--- a/src/test/cli/ceph-authtool/help.t
+++ b/src/test/cli/ceph-authtool/help.t
@@ -10,9 +10,15 @@
                                   'mount -o secret=..' argument
     -C, --create-keyring          will create a new keyring, overwriting any
                                   existing keyringfile
-    --gen-key                     will generate a new secret key for the
+    -g, --gen-key                 will generate a new secret key for the
                                   specified entityname
-    --add-key                     will add an encoded key to the keyring
+    --gen-print-key               will generate a new secret key without set it
+                                  to the keyringfile, prints the secret to stdout
+    --import-keyring              will import the content of a given keyring
+                                  into the keyringfile
+    -u, --set-uid                 sets the auid (authenticated user id) for the
+                                  specified entityname
+    -a, --add-key                 will add an encoded key to the keyring
     --cap subsystem capability    will set the capability for given subsystem
     --caps capsfile               will set all of capabilities associated with a
                                   given key, for all subsystems
diff --git a/src/test/cli/ceph-authtool/manpage.t b/src/test/cli/ceph-authtool/manpage.t
index 69e15fa51c3..a9e1408d716 100644
--- a/src/test/cli/ceph-authtool/manpage.t
+++ b/src/test/cli/ceph-authtool/manpage.t
@@ -9,9 +9,15 @@
                                   'mount -o secret=..' argument
     -C, --create-keyring          will create a new keyring, overwriting any
                                   existing keyringfile
-    --gen-key                     will generate a new secret key for the
+    -g, --gen-key                 will generate a new secret key for the
                                   specified entityname
-    --add-key                     will add an encoded key to the keyring
+    --gen-print-key               will generate a new secret key without set it
+                                  to the keyringfile, prints the secret to stdout
+    --import-keyring              will import the content of a given keyring
+                                  into the keyringfile
+    -u, --set-uid                 sets the auid (authenticated user id) for the
+                                  specified entityname
+    -a, --add-key                 will add an encoded key to the keyring
     --cap subsystem capability    will set the capability for given subsystem
     --caps capsfile               will set all of capabilities associated with a
                                   given key, for all subsystems
diff --git a/src/test/cli/ceph-authtool/simple.t b/src/test/cli/ceph-authtool/simple.t
index 69d027c1342..b86476a5ad0 100644
--- a/src/test/cli/ceph-authtool/simple.t
+++ b/src/test/cli/ceph-authtool/simple.t
@@ -9,9 +9,15 @@
                                   'mount -o secret=..' argument
     -C, --create-keyring          will create a new keyring, overwriting any
                                   existing keyringfile
-    --gen-key                     will generate a new secret key for the
+    -g, --gen-key                 will generate a new secret key for the
                                   specified entityname
-    --add-key                     will add an encoded key to the keyring
+    --gen-print-key               will generate a new secret key without set it
+                                  to the keyringfile, prints the secret to stdout
+    --import-keyring              will import the content of a given keyring
+                                  into the keyringfile
+    -u, --set-uid                 sets the auid (authenticated user id) for the
+                                  specified entityname
+    -a, --add-key                 will add an encoded key to the keyring
     --cap subsystem capability    will set the capability for given subsystem
     --caps capsfile               will set all of capabilities associated with a
                                   given key, for all subsystems
diff --git a/src/test/cls_log/test_cls_log.cc b/src/test/cls_log/test_cls_log.cc
index a8d1b3d5300..ce97025f819 100644
--- a/src/test/cls_log/test_cls_log.cc
+++ b/src/test/cls_log/test_cls_log.cc
@@ -118,14 +118,10 @@ TEST(cls_rgw, test_log_add_same_time)
   /* add chains */
   string oid = "obj";
 
-
   /* create object */
-
   ASSERT_EQ(0, ioctx.create(oid, true));
 
-
   /* generate log */
-
   utime_t start_time = ceph_clock_now(g_ceph_context);
   generate_log(ioctx, oid, 10, start_time, false);
 
@@ -206,14 +202,10 @@ TEST(cls_rgw, test_log_add_different_time)
   /* add chains */
   string oid = "obj";
 
-
   /* create object */
-
   ASSERT_EQ(0, ioctx.create(oid, true));
 
-
   /* generate log */
-
   utime_t start_time = ceph_clock_now(g_ceph_context);
   generate_log(ioctx, oid, 10, start_time, true);
 
@@ -227,7 +219,6 @@ TEST(cls_rgw, test_log_add_different_time)
   string marker;
 
   /* check list */
-
   cls_log_list(*rop, start_time, to_time, marker, 0, entries, &marker, &truncated);
 
   bufferlist obl;
@@ -258,7 +249,6 @@ TEST(cls_rgw, test_log_add_different_time)
   reset_rop(&rop);
 
   /* check list again with shifted time */
-
   utime_t next_time = get_time(start_time, 1, true);
 
   marker.clear();
@@ -289,7 +279,7 @@ TEST(cls_rgw, test_log_add_different_time)
   } while (truncated);
 
   ASSERT_EQ(10, i);
-
+  delete rop;
 }
 
 TEST(cls_rgw, test_log_trim)
@@ -305,14 +295,10 @@ TEST(cls_rgw, test_log_trim)
   /* add chains */
   string oid = "obj";
 
-
   /* create object */
-
   ASSERT_EQ(0, ioctx.create(oid, true));
 
-
   /* generate log */
-
   utime_t start_time = ceph_clock_now(g_ceph_context);
   generate_log(ioctx, oid, 10, start_time, true);
 
@@ -344,4 +330,5 @@ TEST(cls_rgw, test_log_trim)
     ASSERT_EQ(9 - i, (int)entries.size());
     ASSERT_EQ(0, (int)truncated);
   }
+  delete rop;
 }
diff --git a/src/test/cls_replica_log/test_cls_replica_log.cc b/src/test/cls_replica_log/test_cls_replica_log.cc
index eabe0b3860d..8c204caef04 100644
--- a/src/test/cls_replica_log/test_cls_replica_log.cc
+++ b/src/test/cls_replica_log/test_cls_replica_log.cc
@@ -14,34 +14,42 @@
 #include "cls/replica_log/cls_replica_log_client.h"
 #include "cls/replica_log/cls_replica_log_types.h"
 
-#define SETUP_DATA \
-  librados::Rados rados; \
-  librados::IoCtx ioctx; \
-  string pool_name = get_temp_pool_name(); \
-  ASSERT_EQ("", create_one_pool_pp(pool_name, rados)); \
-  ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx)); \
-  string oid = "obj"; \
-  ASSERT_EQ(0, ioctx.create(oid, true));
-
-#define ADD_MARKER \
-  string entity = "tester_entity"; \
-  string marker = "tester_marker1"; \
-  utime_t time; \
-  time.set_from_double(10); \
-  list<pair<string, utime_t> > entries; \
-  entries.push_back(make_pair("tester_obj1", time)); \
-  time.set_from_double(20); \
-  cls_replica_log_progress_marker progress; \
-  cls_replica_log_prepare_marker(progress, entity, marker, time, &entries); \
-  librados::ObjectWriteOperation opw; \
-  cls_replica_log_update_bound(opw, progress); \
-  ASSERT_EQ(0, ioctx.operate(oid, &opw));
-
-TEST(cls_replica_log, test_set_get_marker)
+class cls_replica_log_Test : public ::testing::Test {
+public:
+  librados::Rados rados;
+  librados::IoCtx ioctx;
+  string pool_name;
+  string oid;
+  string entity;
+  string marker;
+  utime_t time;
+  list<pair<string, utime_t> > entries;
+  cls_replica_log_progress_marker progress;
+
+  void SetUp() {
+    pool_name = get_temp_pool_name();
+    ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
+    ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
+    oid = "obj";
+    ASSERT_EQ(0, ioctx.create(oid, true));
+  }
+
+  void add_marker() {
+    entity = "tester_entity";
+    marker = "tester_marker1";
+    time.set_from_double(10);
+    entries.push_back(make_pair("tester_obj1", time));
+    time.set_from_double(20);
+    cls_replica_log_prepare_marker(progress, entity, marker, time, &entries);
+    librados::ObjectWriteOperation opw;
+    cls_replica_log_update_bound(opw, progress);
+    ASSERT_EQ(0, ioctx.operate(oid, &opw));
+  }
+};
+
+TEST_F(cls_replica_log_Test, test_set_get_marker)
 {
-  SETUP_DATA
-
-  ADD_MARKER
+  add_marker();
 
   string reply_position_marker;
   utime_t reply_time;
@@ -66,11 +74,9 @@ TEST(cls_replica_log, test_set_get_marker)
   ASSERT_EQ("tester_obj1", response_item_list.front().first);
 }
 
-TEST(cls_replica_log, test_bad_update)
+TEST_F(cls_replica_log_Test, test_bad_update)
 {
-  SETUP_DATA
-
-  ADD_MARKER
+  add_marker();
 
   time.set_from_double(15);
   cls_replica_log_progress_marker bad_marker;
@@ -80,22 +86,18 @@ TEST(cls_replica_log, test_bad_update)
   ASSERT_EQ(-EINVAL, ioctx.operate(oid, &badw));
 }
 
-TEST(cls_replica_log, test_bad_delete)
+TEST_F(cls_replica_log_Test, test_bad_delete)
 {
-  SETUP_DATA
-
-  ADD_MARKER
+  add_marker();
 
   librados::ObjectWriteOperation badd;
   cls_replica_log_delete_bound(badd, entity);
   ASSERT_EQ(-ENOTEMPTY, ioctx.operate(oid, &badd));
 }
 
-TEST(cls_replica_log, test_good_delete)
+TEST_F(cls_replica_log_Test, test_good_delete)
 {
-  SETUP_DATA
-
-  ADD_MARKER
+  add_marker();
 
   librados::ObjectWriteOperation opc;
   progress.items.clear();
@@ -113,10 +115,8 @@ TEST(cls_replica_log, test_good_delete)
   ASSERT_EQ((unsigned)0, return_progress_list.size());
 }
 
-TEST(cls_replica_log, test_bad_get)
+TEST_F(cls_replica_log_Test, test_bad_get)
 {
-  SETUP_DATA
-
   string reply_position_marker;
   utime_t reply_time;
   list<cls_replica_log_progress_marker> return_progress_list;
@@ -125,11 +125,9 @@ TEST(cls_replica_log, test_bad_get)
                                        reply_time, return_progress_list));
 }
 
-TEST(cls_replica_log, test_double_delete)
+TEST_F(cls_replica_log_Test, test_double_delete)
 {
-  SETUP_DATA
-
-  ADD_MARKER
+  add_marker();
 
   librados::ObjectWriteOperation opc;
   progress.items.clear();
diff --git a/src/test/cls_statelog/test_cls_statelog.cc b/src/test/cls_statelog/test_cls_statelog.cc
index 294b528f5db..a1b4cc34efc 100644
--- a/src/test/cls_statelog/test_cls_statelog.cc
+++ b/src/test/cls_statelog/test_cls_statelog.cc
@@ -70,6 +70,7 @@ static void get_entries_by_object(librados::IoCtx& ioctx, string& oid,
   cls_statelog_list(*rop, empty_str, op_id, object, marker, 0, entries, &marker, &truncated);
   ASSERT_EQ(0, ioctx.operate(oid, rop, &obl));
   ASSERT_EQ(expected, (int)entries.size());
+  delete rop;
 }
 
 static void get_entries_by_client_id(librados::IoCtx& ioctx, string& oid,
@@ -84,6 +85,7 @@ static void get_entries_by_client_id(librados::IoCtx& ioctx, string& oid,
   cls_statelog_list(*rop, client_id, op_id, empty_str, marker, 0, entries, &marker, &truncated);
   ASSERT_EQ(0, ioctx.operate(oid, rop, &obl));
   ASSERT_EQ(expected, (int)entries.size());
+  delete rop;
 }
 
 static void get_all_entries(librados::IoCtx& ioctx, string& oid, list<cls_statelog_entry>& entries, int expected)
@@ -203,5 +205,7 @@ TEST(cls_rgw, test_statelog_basic)
   string empty_str;
   get_entries_by_client_id(ioctx, oid, entries, e.client_id, empty_str, 4);
   get_entries_by_object(ioctx, oid, entries, e.object, empty_str, 1);
+  delete op;
+  delete rop;
 }
 
diff --git a/src/test/cls_version/test_cls_version.cc b/src/test/cls_version/test_cls_version.cc
index caa0a36cd74..4c2d59500df 100644
--- a/src/test/cls_version/test_cls_version.cc
+++ b/src/test/cls_version/test_cls_version.cc
@@ -55,6 +55,7 @@ TEST(cls_rgw, test_version_inc_read)
   ASSERT_NE(0, (int)ver.tag.size());
 
   /* inc version again! */
+  delete op;
   op = new_op();
   cls_version_inc(*op);
   ASSERT_EQ(0, ioctx.operate(oid, op));
@@ -136,7 +137,6 @@ TEST(cls_rgw, test_version_inc_cond)
   /* add chains */
   string oid = "obj";
 
-
   /* create object */
 
   ASSERT_EQ(0, ioctx.create(oid, true));
@@ -160,6 +160,7 @@ TEST(cls_rgw, test_version_inc_cond)
 
 
   /* inc version again! */
+  delete op;
   op = new_op();
   cls_version_inc(*op);
   ASSERT_EQ(0, ioctx.operate(oid, op));
@@ -180,18 +181,22 @@ TEST(cls_rgw, test_version_inc_cond)
   ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag));
 
   /* a bunch of conditions that should fail */
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_EQ);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_LT);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_LE);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_TAG_NE);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, op));
@@ -201,18 +206,22 @@ TEST(cls_rgw, test_version_inc_cond)
   ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag));
 
   /* a bunch of conditions that should succeed */
+  delete op;
   op = new_op();
   cls_version_inc(*op, ver2, VER_COND_EQ);
   ASSERT_EQ(0, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_GT);
   ASSERT_EQ(0, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_GE);
   ASSERT_EQ(0, ioctx.operate(oid, op));
 
+  delete op;
   op = new_op();
   cls_version_inc(*op, cond_ver, VER_COND_TAG_EQ);
   ASSERT_EQ(0, ioctx.operate(oid, op));
@@ -261,20 +270,24 @@ TEST(cls_rgw, test_version_inc_check)
   bufferlist bl;
   ASSERT_EQ(0, ioctx.operate(oid, rop, &bl));
 
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, cond_ver, VER_COND_GE);
   ASSERT_EQ(0, ioctx.operate(oid, rop, &bl));
 
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, cond_ver, VER_COND_LE);
   ASSERT_EQ(0, ioctx.operate(oid, rop, &bl));
 
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, cond_ver, VER_COND_TAG_EQ);
   ASSERT_EQ(0, ioctx.operate(oid, rop, &bl));
 
   obj_version ver2;
 
+  delete op;
   op = new_op();
   cls_version_inc(*op);
   ASSERT_EQ(0, ioctx.operate(oid, op));
@@ -283,16 +296,23 @@ TEST(cls_rgw, test_version_inc_check)
   ASSERT_GT((long long)ver2.ver, (long long)ver.ver);
   ASSERT_EQ(0, (int)ver2.tag.compare(ver.tag));
 
+  delete op;
+
   /* a bunch of conditions that should fail */
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, ver, VER_COND_LT);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl));
 
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, cond_ver, VER_COND_LE);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl));
 
+  delete rop;
   rop = new_rop();
   cls_version_check(*rop, cond_ver, VER_COND_TAG_NE);
   ASSERT_EQ(-ECANCELED, ioctx.operate(oid, rop, &bl));
+
+  delete rop;
 }
diff --git a/src/test/common/test_sharedptr_registry.cc b/src/test/common/test_sharedptr_registry.cc
new file mode 100644
index 00000000000..aec2107c9e5
--- /dev/null
+++ b/src/test/common/test_sharedptr_registry.cc
@@ -0,0 +1,289 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include "common/Thread.h"
+#include "common/sharedptr_registry.hpp"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include <gtest/gtest.h>
+
+using namespace std::tr1;
+
+class SharedPtrRegistryTest : public SharedPtrRegistry<unsigned int, int> {
+public:
+  Mutex &get_lock() { return lock; }
+  map<unsigned int, weak_ptr<int> > &get_contents() { return contents; }
+};
+
+class SharedPtrRegistry_all : public ::testing::Test {
+public:
+
+  class Thread_wait : public Thread {
+  public:
+    SharedPtrRegistryTest &registry;
+    unsigned int key;
+    int value;
+    shared_ptr<int> ptr;
+    enum in_method_t { LOOKUP, LOOKUP_OR_CREATE } in_method;
+
+    Thread_wait(SharedPtrRegistryTest& _registry, unsigned int _key, int _value, in_method_t _in_method) : 
+      registry(_registry),
+      key(_key),
+      value(_value),
+      in_method(_in_method)
+    {
+    }
+    
+    virtual void *entry() {
+      switch(in_method) {
+      case LOOKUP_OR_CREATE:
+	if (value) 
+	  ptr = registry.lookup_or_create<int>(key, value);
+	else
+	  ptr = registry.lookup_or_create(key);
+	break;
+      case LOOKUP:
+	ptr = shared_ptr<int>(new int);
+	*ptr = value;
+	ptr = registry.lookup(key);
+	break;
+      }
+      return NULL;
+    }
+  };
+
+  static const useconds_t DELAY_MAX = 20 * 1000 * 1000;
+  static useconds_t delay;
+
+  bool wait_for(SharedPtrRegistryTest &registry, int waiting) {
+    do {
+      //
+      // the delay variable is supposed to be initialized to zero. It would be fine
+      // to usleep(0) but we take this opportunity to test the loop. It will try 
+      // again and therefore show that the logic ( increasing the delay ) actually
+      // works. 
+      //
+      if (delay > 0)
+	usleep(delay);
+      {
+	Mutex::Locker l(registry.get_lock());
+	if (registry.waiting == waiting) 
+	  break;
+      }
+      if (delay > 0)
+	cout << "delay " << delay << "us, is not long enough, try again\n";
+    } while (( delay = delay * 2 + 1) < DELAY_MAX);
+    return delay < DELAY_MAX;
+  }
+};
+
+useconds_t SharedPtrRegistry_all::delay = 0;
+
+TEST_F(SharedPtrRegistry_all, lookup_or_create) {
+  SharedPtrRegistryTest registry;
+  unsigned int key = 1;
+  int value = 2;
+  shared_ptr<int> ptr = registry.lookup_or_create(key);
+  *ptr = value;
+  ASSERT_EQ(value, *registry.lookup_or_create(key));
+}
+
+TEST_F(SharedPtrRegistry_all, wait_lookup_or_create) {
+  SharedPtrRegistryTest registry;
+
+  //
+  // simulate the following: The last reference to a shared_ptr goes
+  // out of scope and the shared_ptr object is about to be removed and
+  // marked as such. The weak_ptr stored in the registry will show
+  // that it has expired(). However, the SharedPtrRegistry::OnRemoval
+  // object has not yet been called and did not get a chance to
+  // acquire the lock. The lookup_or_create and lookup methods must
+  // detect that situation and wait until the weak_ptr is removed from
+  // the registry.
+  //
+  {
+    unsigned int key = 1;
+    {
+      shared_ptr<int> ptr(new int);
+      registry.get_contents()[key] = ptr;
+    }
+    EXPECT_FALSE(registry.get_contents()[key].lock());
+
+    Thread_wait t(registry, key, 0, Thread_wait::LOOKUP_OR_CREATE);
+    t.create();
+    ASSERT_TRUE(wait_for(registry, 1));
+    EXPECT_FALSE(t.ptr);
+    // waiting on a key does not block lookups on other keys
+    EXPECT_TRUE(registry.lookup_or_create(key + 12345));
+    registry.remove(key);
+    ASSERT_TRUE(wait_for(registry, 0));
+    EXPECT_TRUE(t.ptr);
+    t.join();
+  }
+  {
+    unsigned int key = 2;
+    int value = 3;
+    {
+      shared_ptr<int> ptr(new int);
+      registry.get_contents()[key] = ptr;
+    }
+    EXPECT_FALSE(registry.get_contents()[key].lock());
+
+    Thread_wait t(registry, key, value, Thread_wait::LOOKUP_OR_CREATE);
+    t.create();
+    ASSERT_TRUE(wait_for(registry, 1));
+    EXPECT_FALSE(t.ptr);
+    // waiting on a key does not block lookups on other keys
+    {
+      int other_value = value + 1;
+      unsigned int other_key = key + 1;
+      shared_ptr<int> ptr = registry.lookup_or_create<int>(other_key, other_value);
+      EXPECT_TRUE(ptr);
+      EXPECT_EQ(other_value, *ptr);
+    }
+    registry.remove(key);
+    ASSERT_TRUE(wait_for(registry, 0));
+    EXPECT_TRUE(t.ptr);
+    EXPECT_EQ(value, *t.ptr);
+    t.join();
+  }
+}
+
+TEST_F(SharedPtrRegistry_all, lookup) {
+  SharedPtrRegistryTest registry;
+  unsigned int key = 1;
+  int value = 2;
+  {
+    shared_ptr<int> ptr = registry.lookup_or_create(key);
+    *ptr = value;
+    ASSERT_EQ(value, *registry.lookup(key));
+  }
+  ASSERT_FALSE(registry.lookup(key));
+}
+
+TEST_F(SharedPtrRegistry_all, wait_lookup) {
+  SharedPtrRegistryTest registry;
+
+  unsigned int key = 1;
+  int value = 2;
+  {
+    shared_ptr<int> ptr(new int);
+    registry.get_contents()[key] = ptr;
+  }
+  EXPECT_FALSE(registry.get_contents()[key].lock());
+
+  Thread_wait t(registry, key, value, Thread_wait::LOOKUP);
+  t.create();
+  ASSERT_TRUE(wait_for(registry, 1));
+  EXPECT_EQ(value, *t.ptr);
+  // waiting on a key does not block lookups on other keys
+  EXPECT_FALSE(registry.lookup(key + 12345));
+  registry.remove(key);
+  ASSERT_TRUE(wait_for(registry, 0));
+  EXPECT_FALSE(t.ptr);
+  t.join();
+}
+
+TEST_F(SharedPtrRegistry_all, get_next) {
+
+  {
+    SharedPtrRegistry<unsigned int,int> registry;
+    const unsigned int key = 0;
+    pair<unsigned int, int> i;
+    EXPECT_FALSE(registry.get_next(key, &i));
+  }
+  {
+    SharedPtrRegistryTest registry;
+
+    const unsigned int key2 = 333;
+    shared_ptr<int> ptr2 = registry.lookup_or_create(key2);
+    const int value2 = *ptr2 = 400;
+
+    // entries with expired pointers are silentely ignored
+    const unsigned int key_gone = 222;
+    registry.get_contents()[key_gone] = shared_ptr<int>();
+
+    const unsigned int key1 = 111;
+    shared_ptr<int> ptr1 = registry.lookup_or_create(key1);
+    const int value1 = *ptr1 = 800;
+
+    pair<unsigned int, int> i;
+    EXPECT_TRUE(registry.get_next(i.first, &i));
+    EXPECT_EQ(key1, i.first);
+    EXPECT_EQ(value1, i.second);
+
+    EXPECT_TRUE(registry.get_next(i.first, &i));
+    EXPECT_EQ(key2, i.first);
+    EXPECT_EQ(value2, i.second);
+
+    EXPECT_FALSE(registry.get_next(i.first, &i));
+  }
+}
+
+class SharedPtrRegistry_destructor : public ::testing::Test {
+public:
+
+  typedef enum { UNDEFINED, YES, NO } DieEnum;
+  static DieEnum died;
+
+  struct TellDie {
+    TellDie() { died = NO; }
+    ~TellDie() { died = YES; }
+    
+    int value;
+  };
+
+  virtual void SetUp() {
+    died = UNDEFINED;
+  }
+};
+
+SharedPtrRegistry_destructor::DieEnum SharedPtrRegistry_destructor::died = SharedPtrRegistry_destructor::UNDEFINED;
+
+TEST_F(SharedPtrRegistry_destructor, destructor) {
+  SharedPtrRegistry<int,TellDie> registry;
+  EXPECT_EQ(UNDEFINED, died);
+  int key = 101;
+  {
+    shared_ptr<TellDie> a = registry.lookup_or_create(key);
+    EXPECT_EQ(NO, died);
+    EXPECT_TRUE(a);
+  }
+  EXPECT_EQ(YES, died);
+  EXPECT_FALSE(registry.lookup(key));
+}
+
+int main(int argc, char **argv) {
+  vector<const char*> args;
+  argv_to_vec(argc, (const char **)argv, args);
+
+  global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+  common_init_finish(g_ceph_context);
+
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_sharedptr_registry && ./unittest_sharedptr_registry # --gtest_filter=*.* --log-to-stderr=true"
+// End:
diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc
index 87482ef702d..80c775052ec 100644
--- a/src/test/filestore/store_test.cc
+++ b/src/test/filestore/store_test.cc
@@ -829,6 +829,75 @@ TEST_F(StoreTest, ColSplitTest3) {
 }
 #endif
 
+/**
+ * This test tests adding two different groups
+ * of objects, each with 1 common prefix and 1
+ * different prefix.  We then remove half
+ * in order to verify that the merging correctly
+ * stops at the common prefix subdir.  See bug
+ * #5273 */
+TEST_F(StoreTest, TwoHash) {
+  coll_t cid("asdf");
+  int r;
+  {
+    ObjectStore::Transaction t;
+    t.create_collection(cid);
+    r = store->apply_transaction(t);
+    ASSERT_EQ(r, 0);
+  }
+  std::cout << "Making objects" << std::endl;
+  for (int i = 0; i < 360; ++i) {
+    ObjectStore::Transaction t;
+    hobject_t o;
+    if (i < 8) {
+      o.hash = (i << 16) | 0xA1;
+      t.touch(cid, o);
+    }
+    o.hash = (i << 16) | 0xB1;
+    t.touch(cid, o);
+    r = store->apply_transaction(t);
+    ASSERT_EQ(r, 0);
+  }
+  std::cout << "Removing half" << std::endl;
+  for (int i = 1; i < 8; ++i) {
+    ObjectStore::Transaction t;
+    hobject_t o;
+    o.hash = (i << 16) | 0xA1;
+    t.remove(cid, o);
+    r = store->apply_transaction(t);
+    ASSERT_EQ(r, 0);
+  }
+  std::cout << "Checking" << std::endl;
+  for (int i = 1; i < 8; ++i) {
+    ObjectStore::Transaction t;
+    hobject_t o;
+    o.hash = (i << 16) | 0xA1;
+    bool exists = store->exists(cid, o);
+    ASSERT_EQ(exists, false);
+  }
+  {
+    hobject_t o;
+    o.hash = 0xA1;
+    bool exists = store->exists(cid, o);
+    ASSERT_EQ(exists, true);
+  }
+  std::cout << "Cleanup" << std::endl;
+  for (int i = 0; i < 360; ++i) {
+    ObjectStore::Transaction t;
+    hobject_t o;
+    o.hash = (i << 16) | 0xA1;
+    t.remove(cid, o);
+    o.hash = (i << 16) | 0xB1;
+    t.remove(cid, o);
+    r = store->apply_transaction(t);
+    ASSERT_EQ(r, 0);
+  }
+  ObjectStore::Transaction t;
+  t.remove_collection(cid);
+  r = store->apply_transaction(t);
+  ASSERT_EQ(r, 0);
+}
+
 //
 // support tests for qa/workunits/filestore/filestore.sh
 //
@@ -892,7 +961,7 @@ int main(int argc, char **argv) {
   global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
   common_init_finish(g_ceph_context);
   g_ceph_context->_conf->set_val("osd_journal_size", "400");
-  g_ceph_context->_conf->set_val("filestore_index_retry_probability", "1");
+  g_ceph_context->_conf->set_val("filestore_index_retry_probability", "0.5");
   g_ceph_context->_conf->set_val("filestore_op_thread_timeout", "1000");
   g_ceph_context->_conf->set_val("filestore_op_thread_suicide_timeout", "10000");
   g_ceph_context->_conf->apply_changes(NULL);
diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc
index 43530f00828..6ac661c0629 100644
--- a/src/test/osd/TestRados.cc
+++ b/src/test/osd/TestRados.cc
@@ -2,6 +2,7 @@
 #include "common/Mutex.h"
 #include "common/Cond.h"
 #include "common/errno.h"
+#include "common/version.h"
 
 #include <iostream>
 #include <sstream>
@@ -14,6 +15,7 @@
 
 #include "test/osd/RadosModel.h"
 
+
 using namespace std;
 
 class WeightedTestGenerator : public TestOpGenerator
@@ -250,6 +252,7 @@ int main(int argc, char **argv)
   if (max_stride_size < 0)
     max_stride_size = size / 5;
 
+  cout << pretty_version_to_str() << std::endl;
   cout << "Configuration:" << std::endl
        << "\tNumber of operations: " << ops << std::endl
        << "\tNumber of objects: " << objects << std::endl
diff --git a/src/test/perf_counters.cc b/src/test/perf_counters.cc
index d8f04ca7d10..d0b05f9f049 100644
--- a/src/test/perf_counters.cc
+++ b/src/test/perf_counters.cc
@@ -60,7 +60,7 @@ int main(int argc, char **argv) {
 TEST(PerfCounters, SimpleTest) {
   AdminSocketClient client(get_rand_socket_path());
   std::string message;
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &message));
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\" }", &message));
   ASSERT_EQ("{}", message);
 }
 
@@ -100,20 +100,20 @@ TEST(PerfCounters, SinglePerfCounters) {
   coll->add(fake_pf);
   AdminSocketClient client(get_rand_socket_path());
   std::string msg;
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':0,"
-	    "'element2':0.000000000,'element3':{'avgcount':0,'sum':0.000000000}}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":0,"
+	    "\"element2\":0.000000000,\"element3\":{\"avgcount\":0,\"sum\":0.000000000}}}"), msg);
   fake_pf->inc(TEST_PERFCOUNTERS1_ELEMENT_1);
   fake_pf->tset(TEST_PERFCOUNTERS1_ELEMENT_2, utime_t(0, 500000000));
   fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t(100, 0));
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':1,"
-	    "'element2':0.500000000,'element3':{'avgcount':1,'sum':100.000000000}}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":1,"
+	    "\"element2\":0.500000000,\"element3\":{\"avgcount\":1,\"sum\":100.000000000}}}"), msg);
   fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t());
   fake_pf->tinc(TEST_PERFCOUNTERS1_ELEMENT_3, utime_t(25,0));
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':1,'element2':0.500000000,"
-	    "'element3':{'avgcount':3,'sum':125.000000000}}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":1,\"element2\":0.500000000,"
+	    "\"element3\":{\"avgcount\":3,\"sum\":125.000000000}}}"), msg);
 }
 
 enum {
@@ -142,24 +142,24 @@ TEST(PerfCounters, MultiplePerfCounters) {
   AdminSocketClient client(get_rand_socket_path());
   std::string msg;
 
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':0,'element2':0.000000000,'element3':"
-	    "{'avgcount':0,'sum':0.000000000}},'test_perfcounter_2':{'foo':0,'bar':0.000000000}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":0,\"element2\":0.000000000,\"element3\":"
+	    "{\"avgcount\":0,\"sum\":0.000000000}},\"test_perfcounter_2\":{\"foo\":0,\"bar\":0.000000000}}"), msg);
 
   fake_pf1->inc(TEST_PERFCOUNTERS1_ELEMENT_1);
   fake_pf1->inc(TEST_PERFCOUNTERS1_ELEMENT_1, 5);
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':6,'element2':0.000000000,'element3':"
-	    "{'avgcount':0,'sum':0.000000000}},'test_perfcounter_2':{'foo':0,'bar':0.000000000}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":6,\"element2\":0.000000000,\"element3\":"
+	    "{\"avgcount\":0,\"sum\":0.000000000}},\"test_perfcounter_2\":{\"foo\":0,\"bar\":0.000000000}}"), msg);
 
   coll->remove(fake_pf2);
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':6,'element2':0.000000000,"
-	    "'element3':{'avgcount':0,'sum':0.000000000}}}"), msg);
-  ASSERT_EQ("", client.do_request("perfcounters_schema", &msg));
-  ASSERT_EQ(sd("{'test_perfcounter_1':{'element1':{'type':2},"
-	       "'element2':{'type':1},'element3':{'type':5}}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":6,\"element2\":0.000000000,"
+	    "\"element3\":{\"avgcount\":0,\"sum\":0.000000000}}}"), msg);
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perf schema\", \"format\": \"json\" }", &msg));
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":{\"type\":2},"
+	       "\"element2\":{\"type\":1},\"element3\":{\"type\":5}}}"), msg);
   coll->clear();
-  ASSERT_EQ("", client.do_request("perfcounters_dump", &msg));
+  ASSERT_EQ("", client.do_request("{ \"prefix\": \"perfcounters_dump\", \"format\": \"json\" }", &msg));
   ASSERT_EQ("{}", msg);
 }
diff --git a/src/test/signals.cc b/src/test/signals.cc
index 43754988559..2c7f4d32a0a 100644
--- a/src/test/signals.cc
+++ b/src/test/signals.cc
@@ -110,3 +110,33 @@ TEST(SignalHandler, Multiple)
   unregister_async_signal_handler(SIGUSR2, testhandler);
   shutdown_async_signal_handler();
 }
+
+/*
+TEST(SignalHandler, MultipleBigFd)
+{
+  int ret;
+
+  for (int i = 0; i < 1500; i++)
+    ::open(".", O_RDONLY);
+
+  reset();
+  init_async_signal_handler();
+  register_async_signal_handler(SIGUSR1, testhandler);
+  register_async_signal_handler(SIGUSR2, testhandler);
+  ASSERT_TRUE(usr1 == false);
+  ASSERT_TRUE(usr2 == false);
+
+  ret = kill(getpid(), SIGUSR1);
+  ASSERT_EQ(ret, 0);
+  ret = kill(getpid(), SIGUSR2);
+  ASSERT_EQ(ret, 0);
+
+  sleep(1);
+  ASSERT_TRUE(usr1 == true);
+  ASSERT_TRUE(usr2 == true);
+
+  unregister_async_signal_handler(SIGUSR1, testhandler);
+  unregister_async_signal_handler(SIGUSR2, testhandler);
+  shutdown_async_signal_handler();
+}
+*/
diff --git a/src/test/system/rados_list_parallel.cc b/src/test/system/rados_list_parallel.cc
index a1c6e270265..d530c83441c 100644
--- a/src/test/system/rados_list_parallel.cc
+++ b/src/test/system/rados_list_parallel.cc
@@ -62,6 +62,7 @@ public:
     RETURN1_IF_NONZERO(rados_create(&cl, NULL));
     rados_conf_parse_argv(cl, m_argc, m_argv);
     RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+    rados_conf_parse_env(cl, NULL);
     std::string log_name = SysTestSettings::inst().get_log_name(get_id_str());
     if (!log_name.empty())
       rados_conf_set(cl, "log_file", log_name.c_str());
@@ -142,6 +143,7 @@ public:
     RETURN1_IF_NONZERO(rados_create(&cl, NULL));
     rados_conf_parse_argv(cl, m_argc, m_argv);
     RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+    rados_conf_parse_env(cl, NULL);
     std::string log_name = SysTestSettings::inst().get_log_name(get_id_str());
     if (!log_name.empty())
       rados_conf_set(cl, "log_file", log_name.c_str());
diff --git a/src/test/system/rados_open_pools_parallel.cc b/src/test/system/rados_open_pools_parallel.cc
index 445f2ebc485..82c712077f2 100644
--- a/src/test/system/rados_open_pools_parallel.cc
+++ b/src/test/system/rados_open_pools_parallel.cc
@@ -68,6 +68,7 @@ public:
     if (!log_name.empty())
       rados_conf_set(cl, "log_file", log_name.c_str());
     RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+    rados_conf_parse_env(cl, NULL);
     RETURN1_IF_NONZERO(rados_connect(cl));
     if (m_pool_setup_sem)
       m_pool_setup_sem->wait();
diff --git a/src/test/system/st_rados_create_pool.cc b/src/test/system/st_rados_create_pool.cc
index dcae15375af..29b8d14b7e7 100644
--- a/src/test/system/st_rados_create_pool.cc
+++ b/src/test/system/st_rados_create_pool.cc
@@ -72,6 +72,7 @@ run()
   std::string log_name = SysTestSettings::inst().get_log_name(get_id_str());
   if (!log_name.empty())
     rados_conf_set(cl, "log_file", log_name.c_str());
+  rados_conf_parse_env(cl, NULL);
 
   if (m_setup_sem) {
     m_setup_sem->wait();
diff --git a/src/test/system/st_rados_delete_objs.cc b/src/test/system/st_rados_delete_objs.cc
index 38dc47a3557..b43ffdda6ae 100644
--- a/src/test/system/st_rados_delete_objs.cc
+++ b/src/test/system/st_rados_delete_objs.cc
@@ -45,6 +45,7 @@ int StRadosDeleteObjs::run()
   RETURN1_IF_NONZERO(rados_create(&cl, NULL));
   rados_conf_parse_argv(cl, m_argc, m_argv);
   RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+  rados_conf_parse_env(cl, NULL);
   RETURN1_IF_NONZERO(rados_connect(cl));
   m_setup_sem->wait();
   m_setup_sem->post();
diff --git a/src/test/system/st_rados_delete_pool.cc b/src/test/system/st_rados_delete_pool.cc
index d954bf46c23..de553e98e13 100644
--- a/src/test/system/st_rados_delete_pool.cc
+++ b/src/test/system/st_rados_delete_pool.cc
@@ -41,6 +41,7 @@ int StRadosDeletePool::run()
   RETURN1_IF_NONZERO(rados_create(&cl, NULL));
   rados_conf_parse_argv(cl, m_argc, m_argv);
   RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+  rados_conf_parse_env(cl, NULL);
   RETURN1_IF_NONZERO(rados_connect(cl));
   m_pool_setup_sem->wait();
   m_pool_setup_sem->post();
diff --git a/src/test/system/st_rados_list_objects.cc b/src/test/system/st_rados_list_objects.cc
index bb153affeb8..be6ead64987 100644
--- a/src/test/system/st_rados_list_objects.cc
+++ b/src/test/system/st_rados_list_objects.cc
@@ -56,6 +56,7 @@ run()
   RETURN1_IF_NONZERO(rados_create(&cl, NULL));
   rados_conf_parse_argv(cl, m_argc, m_argv);
   RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+  rados_conf_parse_env(cl, NULL);
   RETURN1_IF_NONZERO(rados_connect(cl));
   m_pool_setup_sem->wait();
   m_pool_setup_sem->post();
diff --git a/src/test/system/st_rados_notify.cc b/src/test/system/st_rados_notify.cc
index 10e3e4bfbc3..e74711cb641 100644
--- a/src/test/system/st_rados_notify.cc
+++ b/src/test/system/st_rados_notify.cc
@@ -44,6 +44,8 @@ int StRadosNotify::run()
   RETURN1_IF_NONZERO(rados_create(&cl, NULL));
   rados_conf_parse_argv(cl, m_argc, m_argv);
   RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+  rados_conf_parse_env(cl, NULL);
+
   if (m_setup_sem) {
     m_setup_sem->wait();
     m_setup_sem->post();
diff --git a/src/test/system/st_rados_watch.cc b/src/test/system/st_rados_watch.cc
index 991dde8ea77..696b0867e88 100644
--- a/src/test/system/st_rados_watch.cc
+++ b/src/test/system/st_rados_watch.cc
@@ -52,6 +52,8 @@ run()
   RETURN1_IF_NONZERO(rados_create(&cl, NULL));
   rados_conf_parse_argv(cl, m_argc, m_argv);
   RETURN1_IF_NONZERO(rados_conf_read_file(cl, NULL));
+  rados_conf_parse_env(cl, NULL);
+
   if (m_setup_sem) {
     m_setup_sem->wait();
     m_setup_sem->post();
diff --git a/src/test/test_rgw_admin_log.cc b/src/test/test_rgw_admin_log.cc
index 67ba0b12d96..f49a107d2f6 100644
--- a/src/test/test_rgw_admin_log.cc
+++ b/src/test/test_rgw_admin_log.cc
@@ -163,13 +163,13 @@ void test_helper::set_response(char *r){
 }
 
 size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response((char *)ptr);
   return size*nmemb;
 }
 
 size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response_data((char *)ptr, size*nmemb);
   return size*nmemb;
 }
@@ -405,7 +405,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) {
 
 int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid
     << " --display-name=" << display_name;
 
@@ -426,7 +425,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
 
 int user_rm(string& uid, string& display_name) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << 
     " metadata rm --metadata-key=user:" << uid;
 
diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc
index 2882891e411..fb9cf863f06 100644
--- a/src/test/test_rgw_admin_meta.cc
+++ b/src/test/test_rgw_admin_meta.cc
@@ -157,13 +157,13 @@ void test_helper::set_response(char *r){
 }
 
 size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response((char *)ptr);
   return size*nmemb;
 }
 
 size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response_data((char *)ptr, size*nmemb);
   return size*nmemb;
 }
@@ -399,7 +399,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) {
 
 int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid
     << " --display-name=" << display_name;
 
@@ -420,7 +419,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
 
 int user_rm(string& uid, string& display_name) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << " user rm --uid=" << uid
     << " --display-name=" << display_name;
 
diff --git a/src/test/test_rgw_admin_opstate.cc b/src/test/test_rgw_admin_opstate.cc
index 1cd39890d48..3f173d12932 100644
--- a/src/test/test_rgw_admin_opstate.cc
+++ b/src/test/test_rgw_admin_opstate.cc
@@ -160,13 +160,13 @@ void test_helper::set_response(char *r){
 }
 
 size_t write_header(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response((char *)ptr);
   return size*nmemb;
 }
 
 size_t write_data(void *ptr, size_t size, size_t nmemb, void *ud){
-  test_helper *h = (test_helper *)ud;
+  test_helper *h = static_cast<test_helper *>(ud);
   h->set_response_data((char *)ptr, size*nmemb);
   return size*nmemb;
 }
@@ -403,7 +403,6 @@ int user_create(string& uid, string& display_name, bool set_creds = true) {
 
 int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << " user info --uid=" << uid
     << " --display-name=" << display_name;
 
@@ -424,7 +423,6 @@ int user_info(string& uid, string& display_name, RGWUserInfo& uinfo) {
 
 int user_rm(string& uid, string& display_name) {
   stringstream ss;
-  string creds;
   ss << "-c " << g_test->get_ceph_conf_path() << 
     " metadata rm --metadata-key=user:" << uid;
 
diff --git a/src/upstart/ceph-create-keys.conf b/src/upstart/ceph-create-keys.conf
index de215d98ff3..7c79e692a86 100644
--- a/src/upstart/ceph-create-keys.conf
+++ b/src/upstart/ceph-create-keys.conf
@@ -1,6 +1,7 @@
 description "Create Ceph client.admin key when possible"
 
 start on started ceph-mon
+stop on stopping ceph-mon
 
 task
 
diff --git a/src/upstart/ceph-osd.conf b/src/upstart/ceph-osd.conf
index e26bbc790b9..c0bf4d68c1c 100644
--- a/src/upstart/ceph-osd.conf
+++ b/src/upstart/ceph-osd.conf
@@ -6,7 +6,7 @@ stop on runlevel [!2345] or stopping ceph-osd-all
 respawn
 respawn limit 5 30
 
-limit nofile 16384 16384
+limit nofile 32768 32768
 
 pre-start script
     set -e