60 files changed, 1695 insertions, 234 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index a9880942b5a..ec184746c58 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -19,3 +19,15 @@ v0.67
   commandline tool.  ceph_rest_api.py can be used as a WSGI application
   for deployment in a more-capable web server.  See ceph-rest-api.8
   for more.
+
+* The radosgw caps were inconsistently documented to be either 'mon =
+  allow r' or 'mon = allow rw'.  The 'mon = allow rw' is required for
+  radosgw to create its own pools.  All documentation has been updated
+  accordingly.
+
+* rgw copy object operation may return extra progress info during the
+  operation. At this point it will only happen when doing cross zone
+  copy operations. The S3 response will now return extra <Progress>
+  field under the <CopyResult> container. The Swift response will
+  now send the progress as a json array.
+
diff --git a/configure.ac b/configure.ac
index 09fd81d722d..415da311712 100644
--- a/configure.ac
+++ b/configure.ac
@@ -250,7 +250,10 @@ AS_IF([test "x$with_fuse" != xno],
                          [Define if you have fuse])
                HAVE_LIBFUSE=1
 	       # look for fuse_getgroups and define FUSE_GETGROUPS if found
+           LIBS_saved="$LIBS"
+           LIBS="$LIBS -lfuse"
 	       AC_CHECK_FUNCS([fuse_getgroups])
+           LIBS="$LIBS_saved"
               ],
              [AC_MSG_FAILURE(
                    [no FUSE found (use --without-fuse to disable)])])])
diff --git a/doc/changelog/v0.61.5.txt b/doc/changelog/v0.61.5.txt
new file mode 100644
index 00000000000..9d99f910ca4
--- /dev/null
+++ b/doc/changelog/v0.61.5.txt
@@ -0,0 +1,1199 @@
+commit 8ee10dc4bb73bdd918873f29c70eedc3c7ef1979
+Author: Gary Lowell <gary.lowell@inktank.com>
+Date:   Wed Jul 17 16:39:08 2013 -0700
+
+    v0.61.5
+
+commit 39bffac6b6c898882d03de392f7f2218933d942b
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jul 16 13:14:50 2013 -0700
+
+    ceph-disk: rely on /dev/disk/by-partuuid instead of special-casing journal symlinks
+    
+    This was necessary when ceph-disk-udev didn't create the by-partuuid (and
+    other) symlinks for us, but now it is fragile and error-prone.  (It also
+    appears to be broken on a certain customer RHEL VM.)  See
+    d7f7d613512fe39ec883e11d201793c75ee05db1.
+    
+    Instead, just use the by-partuuid symlinks that we spent all that ugly
+    effort generating.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Dan Mick <dan.mick@inktank.com>
+    (cherry picked from commit 64379e701b3ed862c05f156539506d3382f77aa8)
+
+commit 363d54288254b5e2311cd28fce5988d68cfd5773
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Tue Jul 16 16:49:48 2013 +0100
+
+    mon: Monitor: StoreConverter: clearer debug message on 'needs_conversion()'
+    
+    The previous debug message outputted the function's name, as often our
+    functions do.  This was however a source of bewilderment, as users would
+    see those in logs and think their stores would need conversion.  Changing
+    this message is trivial enough and it will make ceph users happier log
+    readers.
+    
+    Backport: cuttlefish
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit ad1392f68170b391d11df0ce5523c2d1fb57f60e)
+
+commit 0ea89760def73f76d8100889eca3c25b0a6eb772
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Tue Jul 16 16:45:39 2013 +0100
+
+    mon: Monitor: do not reopen MonitorDBStore during conversion
+    
+    We already open the store on ceph_mon.cc, before we start the conversion.
+    Given we are unable to reproduce this every time a conversion is triggered,
+    we are led to believe that this causes a race in leveldb that will lead
+    to 'store.db/LOCK' being locked upon the open this patch removes.
+    
+    Regardless, reopening the db here is pointless as we already did it when
+    we reach Monitor::StoreConverter::convert().
+    
+    Fixes: #5640
+    Backport: cuttlefish
+    
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 036e6739a4e873863bae3d7d00f310c015dfcdb3)
+
+commit 595c09df9134fb0d62144fe1594914c90e567dca
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jul 14 15:54:29 2013 -0700
+
+    messages/MClientReconnect: clear data when encoding
+    
+    The MClientReconnect puts everything in the data payload portion of
+    the message and nothing in the front portion.  That means that if the
+    message is resent (socket failure or something), the messenger thinks it
+    hasn't been encoded yet (front empty) and reencodes, which means
+    everything gets added (again) to the data portion.
+    
+    Decoding keep decoding until it runs out of data, so the second copy
+    means we decode garbage snap realms, leading to the crash in bug
+    
+    Clearing data each time around resolves the problem, although it does
+    mean we do the encoding work multiple times.  We could alternatively
+    (or also) stick some data in the front portion of the payload
+    (ignored), but that changes the wire protocol and I would rather not
+    do that.
+    
+    Fixes: #4565
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+    (cherry picked from commit 314cf046b0b787ca69665e8751eab6fe7adb4037)
+
+commit 8c178e0d39d8d4a4820eb061f79d74f95e60199f
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 15 10:05:37 2013 -0700
+
+    mon: once sync full is chosen, make sure we don't change our mind
+    
+    It is possible for a sequence like:
+    
+     - probe
+     - first probe reply has paxos trim that indicates a full sync is
+       needed
+     - start sync
+     - clear store
+     - something happens that makes us abort and bootstrap (e.g., the
+       provider mon restarts
+     - probe
+     - first probe reply has older paxos trim bound and we call an election
+     - on election completion, we crash because we have no data.
+    
+    Non-determinism of the probe decision aside, we need to ensure that
+    the info we share during probe (fc, lc) is accurate, and that once we
+    clear the store we know we *must* do a full sync.
+    
+    This is a backport of aa60f940ec1994a61624345586dc70d261688456.
+    
+    Fixes: #5621
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+
+commit 6af0ed9bc4cc955f8c30ad9dc6e9095599f323d0
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jul 9 14:12:15 2013 -0700
+
+    mon: do not scrub if scrub is in progress
+    
+    This prevents an assert from unexpected scrub results from the previous
+    scrub on the leader.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 00ae543b3e32f89d906a0e934792cc5309f57696)
+
+commit 5642155ba5ca9b384a7af058a6538ac00c2a592d
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 10 10:06:20 2013 -0700
+
+    messages/MPGStats: do not set paxos version to osdmap epoch
+    
+    The PaxosServiceMessage version field is meant for client-coordinated
+    ordering of messages when switching between monitors (and is rarely
+    used).  Do not fill it with the osdmap epoch lest it be compared to a
+    pgmap version, which may cause the mon to (near) indefinitely put it on
+    a wait queue until the pgmap version catches up.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+    (cherry picked from commit b36338be43f43b6dd4ee87c97f2eaa23b467c386)
+
+commit 06c65988bb0b1d1ec626fe31e9d806a1c4e24b28
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 11 18:43:24 2013 -0700
+
+    osd/OSDmap: fix OSDMap::Incremental::dump() for new pool names
+    
+    The name is always present when pools are created, but not when they are
+    modified.  Also, a name may be present with a new_pools entry if the pool
+    is just renamed.  Separate it out completely in the dump.
+    
+    Backport: cuttlefish, bobtail
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 3e4a29111e89588385e63f8d92ce3d67739dd679)
+
+commit 658240710baaf9c661b8fbf856322907a0d394ee
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 10:49:28 2013 -0700
+
+    mon/PaxosService: prevent reads until initial service commit is done
+    
+    Do not process reads (or, by PaxosService::dispatch() implication, writes)
+    until we have committed the initial service state.  This avoids things like
+    EPERM due to missing keys when we race with mon creation, triggered by
+    teuthology tests doing their health check after startup.
+    
+    Fixes: #5515
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit d08b6d6df7dba06dad73bdec2c945f24afc02717)
+
+commit 5c3ff33771e227b3fb5cc354323846fe8db4ecc1
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 28 12:21:58 2013 -0700
+
+    client: send all request put's through put_request()
+    
+    Make sure all MetaRequest reference put's go through the same path that
+    releases inode references, including all of the error paths.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 87217e1e3cb2785b79d0dec49bd3f23a827551f5)
+
+commit 1df78ad73df581bc7537688ae28bda820b089a13
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 28 11:50:11 2013 -0700
+
+    client: fix remaining Inode::put() caller, and make method psuedo-private
+    
+    Not sure I can make this actually private and make Client::put_inode() a
+    friend method (making all of Client a friend would defeat the purpose).
+    This works well enough, though!
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 9af3b86b25574e4d2cdfd43e61028cffa19bdeb1)
+
+commit fea024cc3dd2c6fd9ff322d1cd15e0d75c92eca5
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jun 27 21:39:35 2013 -0700
+
+    client: use put_inode on MetaRequest inode refs
+    
+    When we drop the request inode refs, we need to use put_inode() to ensure
+    they get cleaned up properly (removed from inode_map, caps released, etc.).
+    Do this explicitly here (as we do with all other inode put() paths that
+    matter).
+    
+    Fixes: #5381
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 81bee6487fb1ce9e090b030d61bda128a3cf4982)
+
+commit 62ae39ec8f208cb8f89e43ba844b9a20b4315c61
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 15:57:48 2013 -0700
+
+    mon: be smarter about calculating last_epoch_clean lower bound
+    
+    We need to take PGs whose mapping has not changed in a long time into
+    account.  For them, the pg state will indicate it was clean at the time of
+    the report, in which case we can use that as a lower-bound on their actual
+    latest epoch clean.  If they are not currently clean (at report time), use
+    the last_epoch_clean value.
+    
+    Fixes: #5519
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit cc0006deee3153e06ddd220bf8a40358ba830135)
+
+commit da725852190245d2f91b7b21e72baee70e4342bd
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 13:27:58 2013 -0700
+
+    osd: report pg stats to mon at least every N (=500) epochs
+    
+    The mon needs a moderately accurate last_epoch_clean value in order to trim
+    old osdmaps.  To prevent a PG that hasn't peered or received IO in forever
+    from preventing this, send pg stats at some minimum frequency.  This will
+    increase the pg stat report workload for the mon over an idle pool, but
+    should be no worse that a cluster that is getting actual IO and sees these
+    updates from normal stat updates.
+    
+    This makes the reported update a bit more aggressive/useful in that the epoch
+    is the last map epoch processed by this PG and not just one that is >= the
+    currenting interval.  Note that the semantics of this field are pretty useless
+    at this point.
+    
+    See #5519
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit da81228cc73c95737f26c630e5c3eccf6ae1aaec)
+
+commit 757af91b2af0da6bbfeeb53551fa1ef4ef9118ea
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 10 11:32:34 2013 -0700
+
+    osd: fix warning
+    
+    From 653e04a79430317e275dd77a46c2b17c788b860b
+    
+    Backport: cuttlefish, bobtail
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit bc291d3fc3fc1cac838565cbe0f25f71d855a6e3)
+
+commit 65af2538329472d2fd078bb961863c40cdabda12
+Merge: e537699 804314b
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jul 12 15:21:20 2013 -0700
+
+    Merge remote-tracking branch 'gh/wip-mon-sync-2' into cuttlefish
+    
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+
+commit e537699b33f84c14f027b56372fbcb0a99bbe88d
+Author: Sandon Van Ness <sandon@inktank.com>
+Date:   Wed Jul 10 14:55:52 2013 -0700
+
+    Get device-by-path by looking for it instead of assuming 3rd entry.
+    
+    On some systems (virtual machines so far) the device-by-path entry
+    from udevadm is not always in the same spot so instead actually
+    look for the right output instead of blindy assuming that its a
+    specific field in the output.
+    
+    Signed-off-by: Sandon Van Ness <sandon@inktank.com>
+    Reviewed-by: Gary Lowell  <gary.lowell@inktank.com>
+
+commit 804314b8bfa5ec75cc9653e2928874c457395c92
+Merge: 6ad9fe1 78f2266
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 10 11:40:37 2013 -0700
+
+    Merge remote-tracking branch 'gh/cuttlefish' into wip-mon-sync-2
+
+commit 78f226634bd80f6678b1f74ccf785bc52fcd6b62
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 10 11:02:08 2013 -0700
+
+    osd: limit number of inc osdmaps send to peers, clients
+    
+    We should not send an unbounded number of inc maps to our peers or clients.
+    In particular, if a peer is not contacted for a while, we may think they
+    have a very old map (say, 10000 epochs ago) and send thousands of inc maps
+    when the distribution shifts and we need to peer.
+    
+    Note that if we do not send enough maps, the peers will make do by
+    requesting the map from somewhere else (currently the mon).  Regardless
+    of the source, however, we must limit the amount that we speculatively
+    share as it usually is not needed.
+    
+    Backport: cuttlefish, bobtail
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Samuel Just <sam.just@inktank.com>
+    (cherry picked from commit 653e04a79430317e275dd77a46c2b17c788b860b)
+
+commit 54ee2dc80ed032c286546da51442340ec9991cdf
+Author: Christophe Courtaut <christophe.courtaut@gmail.com>
+Date:   Mon Jul 1 14:57:17 2013 +0200
+
+    rgw: Fix return value for swift user not found
+    
+    http://tracker.ceph.com/issues/1779 fixes #1779
+    
+    Adjust the return value from rgw_get_user_info_by_swift call
+    in RGW_SWIFT_Auth_Get::execute() to have the correct
+    return code in response.
+    (cherry picked from commit 4089001de1f22d6acd0b9f09996b71c716235551)
+
+commit 47852c263831707fff1570317a7446b0700c5962
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jul 9 21:55:51 2013 -0700
+
+    mon/OSDMonitor: make 'osd crush rm ...' slightly more idempotent
+    
+    This is a manual backport of 18a624fd8b90d9959de51f07622cf0839e6bd9aa.
+    Do not return immediately if we are looking at uncommitted state.t
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit bfc26c656d183fbcc90a352391e47f9f51c96052
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 17:46:40 2013 -0700
+
+    mon/OSDMonitor: fix base case for loading full osdmap
+    
+    Right after cluster creation, first_committed is 1 and latest stashed in 0,
+    but we don't have the initial full map yet.  Thereafter, we do (because we
+    write it with trim).  Fixes afd6c7d8247075003e5be439ad59976c3d123218.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 43fa7aabf1f7e5deb844c1f52d451bab9e7d1006)
+
+commit 7fb3804fb860dcd0340dd3f7c39eec4315f8e4b6
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 15:04:59 2013 -0700
+
+    mon: fix osdmap stash, trim to retain complete history of full maps
+    
+    The current interaction between sync and stashing full osdmaps only on
+    active mons means that a sync can result in an incomplete osdmap_full
+    history:
+    
+     - mon.c starts a full sync
+     - during sync, active osdmap service should_stash_full() is true and
+       includes a full in the txn
+     - mon.c sync finishes
+     - mon.c update_from_paxos gets "latest" stashed that it got from the
+       paxos txn
+     - mon.c does *not* walk to previous inc maps to complete it's collection
+       of full maps.
+    
+    To fix this, we disable the periodic/random stash of full maps by the
+    osdmap service.
+    
+    This introduces a new problem: we must have at least one full map (the first
+    one) in order for a mon that just synced to build it's full collection.
+    Extend the encode_trim() process to allow the osdmap service to include
+    the oldest full map with the trim txn.  This is more complex than just
+    writing the full maps in the txn, but cheaper--we only write the full
+    map at trim time.
+    
+    This *might* be related to previous bugs where the full osdmap was
+    missing, or case where leveldb keys seemed to 'disappear'.
+    
+    Fixes: #5512
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+    (cherry picked from commit afd6c7d8247075003e5be439ad59976c3d123218)
+
+commit 24f90b832c695ef13021db66a178c18369ac356d
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 15:07:57 2013 -0700
+
+    mon: implement simple 'scrub' command
+    
+    Compare all keys within the sync'ed prefixes across members of the quorum
+    and compare the key counts and CRC for inconsistencies.
+    
+    Currently this is a one-shot inefficient hammer.  We'll want to make this
+    work in chunks before it is usable in production environments.
+    
+    Protect with a feature bit to avoid sending MMonScrub to mons who can't
+    decode it.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+    (cherry picked from commit a9906641a1dce150203b72682da05651e4d68ff5)
+    
+    Conflicts:
+    
+    	src/mon/MonCommands.h
+    	src/mon/Monitor.cc
+
+commit 926f723c12428a034545c6c4ff6641e1d5e05d24
+Author: Samuel Just <sam.just@inktank.com>
+Date:   Wed Jul 3 11:18:33 2013 -0700
+
+    Elector.h: features are 64 bit
+    
+    Fixes: #5497
+    Signed-off-by: Samuel Just <sam.just@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Luis <joao.luis@inktank.com>
+    (cherry picked from commit 3564e304e3f50642e4d9ff25e529d5fc60629093)
+
+commit c2b38291e706c9d1d4d337cee3a944f34bf66525
+Author: Samuel Just <sam.just@inktank.com>
+Date:   Wed Jul 3 11:18:19 2013 -0700
+
+    ceph_features.h: declare all features as ULL
+    
+    Otherwise, the first 32 get |'d together as ints.  Then, the result
+    ((int)-1) is sign extended to ((long long int)-1) before being |'d
+    with the 1LL entries.  This results in ~((uint64_t)0).
+    
+    Fixes: #5497
+    Signed-off-by: Samuel Just <sam.just@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Luis <joao.luis@inktank.com>
+    (cherry picked from commit 4255b5c2fb54ae40c53284b3ab700fdfc7e61748)
+
+commit 95ef961d8537fc369efd0634262ffb8f288d6e9e
+Author: Samuel Just <sam.just@inktank.com>
+Date:   Tue Jul 2 21:09:36 2013 -0700
+
+    Pipe: use uint64_t not unsigned when setting features
+    
+    Fixes: #5497
+    Signed-off-by: Samuel Just <sam.just@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Luis <joao.luis@inktank.com>
+    (cherry picked from commit bc3e2f09f8860555d8b3b49b2eea164b4118d817)
+
+commit 09d258b70a28e5cea555b9d7e215fe41d6b84577
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 11:24:48 2013 -0700
+
+    client: remove O_LAZY
+    
+    The once-upon-a-time unique O_LAZY value I chose forever ago is now
+    O_NOATIME, which means that some clients are choosing relaxed
+    consistency without meaning to.
+    
+    It is highly unlikely that a real O_LAZY will ever exist, and we can
+    select it in the ceph case with the ioctl or libcephfs call, so drop
+    any support for doing this via open(2) flags.
+    
+    Update doc/lazy_posix.txt file re: lazy io.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+    (cherry picked from commit 94afedf02d07ad4678222aa66289a74b87768810)
+
+commit c3b684932bad31fc853ad556d16e1e4a9926486e
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 8 12:55:20 2013 -0700
+
+    osd/osd_types: fix pg_stat_t::dump for last_epoch_clean
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 69a55445439fce0dd6a3d32ff4bf436da42f1b11)
+
+commit a02f2510fcc800b9f2cf2a06401a7b97d5985409
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jul 5 16:03:49 2013 -0700
+
+    mon: remove bad assert about monmap version
+    
+    It is possible to start a sync when our newest monmap is 0.  Usually we see
+    e0 from probe, but that isn't always published as part of the very first
+    paxos transaction due to the way PaxosService::_active generates it's
+    first initial commit.
+    
+    In any case, having e0 here is harmless.
+    
+    Fixes: #5509
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 85a1d6cc5d3852c94d1287b566656c5b5024fa13)
+
+commit 6ad9fe17a674ba65bbeb4052cb1ac47f3113e7bf
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 19:33:06 2013 -0700
+
+    mon/Paxos: fix sync restart
+    
+    If we have a sync going, and an election intervenes, the client will
+    try to continue by sending a new start_chunks request.  In order to
+    ensure that we get all of the paxos commits from our original starting
+    point (and thus properly update the keys from which they started),
+    only pay attention if they *also* send their current last_committed
+    version.  Otherwise, start them at the beginning.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit c5812b1c893305a7d20f9eaec2695c8b1691f0c9
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 14:57:06 2013 -0700
+
+    mon: uninline _trim_enable and Paxos::trim_{enable,disable} so we can debug them
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit 6fbcbd7fddf35a5be4b38e536871903bff4f9bf1
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 14:55:34 2013 -0700
+
+    mon/Paxos: increase paxos max join drift
+    
+    A value of 10 is too aggressive for large, long-running syncs. 100 is
+    about 2 minutes of activity at most, which should be a more forgiving
+    buffer.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit f3a51fa30e5ce1656853b40d831409f195f6e4ca
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 14:21:04 2013 -0700
+
+    mon/Paxos: configure minimum paxos txns separately
+    
+    We were using paxos_max_join_drift to control the minimum number of
+    paxos transactions to keep around.  Instead, make this explicit, and
+    separate from the join drift.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit 1156721f22f5f337241eef3d0276ca74fe6352d1
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 17:09:07 2013 -0700
+
+    mon: include any new paxos commits in each sync CHUNK message
+    
+    We already take note of the paxos version when we begin the sync.  As
+    sync progresses and there are new paxos commits/txns, include those
+    and update last_committed, so that when sync completes we will have
+    a full view of everything that happened during sync.
+    
+    Note that this does not introduce any compatibility change.  This change
+    *only* affects the provider.  The key difference is that at the end
+    of the sync, the provide will set version to the latest version, and
+    not the version from the start of the sync (as was done previously).
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit 40672219a081f0dc2dd536977290ef05cfc9f097
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jul 4 12:17:28 2013 -0700
+
+    mon/MonitorDBStore: expose get_chunk_tx()
+    
+    Allow users get the transaction unencoded.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit db2bb270e93ed44f9252d65d1d4c9b36875d0ea5
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 17:15:56 2013 -0700
+
+    mon: enable leveldb cache by default
+    
+    256 is not as large as the upstream 512 MB, but will help signficiantly and
+    be less disruptive for existing cuttlefish clusters.
+    
+    Sort-of backport of e93730b7ffa48b53c8da2f439a60cb6805facf5a.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+
+commit 123f676e3ae8154ca94cb076c4c4ec5389d2a643
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jul 3 16:56:06 2013 -0700
+
+    mon/Paxos: make 'paxos trim disabled max versions' much much larger
+    
+    108000 is about 3 hours if paxos is going full-bore (1 proposal/second).
+    That ought to be pretty safe.  Otherwise, we start trimming to soon and a
+    slow sync will just have to restart when it finishes.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 71ebfe7e1abe4795b46cf00dfe1b03d1893368b0)
+    
+    Conflicts:
+    
+    	src/common/config_opts.h
+
+commit 03393c0df9f54e4f1db60e1058ca5a7cd89f44e6
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jun 26 06:01:40 2013 -0700
+
+    mon: do not reopen MonitorDBStore during startup
+    
+    level doesn't seem to like this when it races with an internal compaction
+    attempt (see below).  Instead, let the store get opened by the ceph_mon
+    caller, and pull a bit of the logic into the caller to make the flow a
+    little easier to follow.
+    
+        -2> 2013-06-25 17:49:25.184490 7f4d439f8780 10 needs_conversion
+        -1> 2013-06-25 17:49:25.184495 7f4d4065c700  5 asok(0x13b1460) entry start
+         0> 2013-06-25 17:49:25.316908 7f4d3fe5b700 -1 *** Caught signal (Segmentation fault) **
+     in thread 7f4d3fe5b700
+    
+     ceph version 0.64-667-g089cba8 (089cba8fc0e8ae8aef9a3111cba7342ecd0f8314)
+     1: ceph-mon() [0x649f0a]
+     2: (()+0xfcb0) [0x7f4d435dccb0]
+     3: (leveldb::Table::BlockReader(void*, leveldb::ReadOptions const&, leveldb::Slice const&)+0x154) [0x806e54]
+     4: ceph-mon() [0x808840]
+     5: ceph-mon() [0x808b39]
+     6: ceph-mon() [0x806540]
+     7: (leveldb::DBImpl::DoCompactionWork(leveldb::DBImpl::CompactionState*)+0xdd) [0x7f363d]
+     8: (leveldb::DBImpl::BackgroundCompaction()+0x2c0) [0x7f4210]
+     9: (leveldb::DBImpl::BackgroundCall()+0x68) [0x7f4cc8]
+     10: ceph-mon() [0x80b3af]
+     11: (()+0x7e9a) [0x7f4d435d4e9a]
+     12: (clone()+0x6d) [0x7f4d4196bccd]
+     NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit ea1f316e5de21487ae034a1aa929068ba23ac525)
+
+commit 0143acc49bc5834836afc2c5a9d8f67030bec85f
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jul 2 14:43:17 2013 -0700
+
+    sysvinit, upstart: handle symlinks to dirs in /var/lib/ceph/*
+    
+    Match a symlink to a dir, not just dirs.  This fixes the osd case of e.g.,
+    creating an osd in /data/osd$id in which ceph-disk makes a symlink from
+    /var/lib/ceph/osd/ceph-$id.
+    
+    Fix proposed by Matt Thompson <matt.thompson@mandiant.com>; extended to
+    include the upstart users too.
+    
+    Fixes: #5490
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Dan Mick <dan.mick@inktank.com>
+    (cherry picked from commit 87c98e92d1375c8bc76196bbbf06f677bef95e64)
+
+commit 7e878bcc8c1b51538f3c05f854a9dac74c09b116
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jul 1 17:33:11 2013 -0700
+
+    rgw: add RGWFormatter_Plain allocation to sidestep cranky strlen()
+    
+    Valgrind complains about an invalid read when we don't pad the allocation,
+    and because it is inlined we can't whitelist it for valgrind.  Workaround
+    the warning by just padding our allocations a bit.
+    
+    Fixes: #5346
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 49ff63b1750789070a8c6fef830c9526ae0f6d9f)
+
+commit ca61402855966210ba1598239eaf454eaad0f5f2
+Author: Yan, Zheng <zheng.z.yan@intel.com>
+Date:   Wed May 15 11:24:36 2013 +0800
+
+    mds: warn on unconnected snap realms
+    
+    When there are more than one active MDS, restarting MDS triggers
+    assertion "reconnected_snaprealms.empty()" quite often. If there
+    is no snapshot in the FS, the items left in reconnected_snaprealms
+    should be other MDS' mdsdir. I think it's harmless.
+    
+    If there are snapshots in the FS, the assertion probably can catch
+    real bugs. But at present, snapshot feature is broken, fixing it is
+    non-trivial. So replace the assertion with a warning.
+    
+    Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+    (cherry picked from commit 26effc0e583b0a3dade6ec81ef26dec1c94ac8b2)
+
+commit e11f258831e14dc3755e09c0fd4f9bfdf79022a7
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jun 26 06:53:08 2013 -0700
+
+    mon/PGMonitor: use post_paxos_update, not init, to refresh from osdmap
+    
+    We do two things here:
+     - make init an one-time unconditional init method, which is what the
+       health service expects/needs.
+     - switch PGMonitor::init to be post_paxos_update() which is called after
+       the other services update, which is what PGMonitor really needs.
+    
+    This is a new version of the fix originally in commit
+    a2fe0137946541e7b3b537698e1865fbce974ca6 (and those around it).  That is,
+    this re-fixes a problem where osds do not see pg creates from their
+    subscribe due to map_pg_creates() not getting called.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e635c47851d185eda557e36bdc4bf3775f7b87a2)
+    
+    Conflicts:
+    	src/mon/PGMonitor.cc
+    	src/mon/PGMonitor.h
+
+commit 4d07fb014178da3c88edeb8765e1aaacb8cb8ffa
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jun 26 06:52:01 2013 -0700
+
+    mon/PaxosService: add post_paxos_update() hook
+    
+    Some services need to update internal state based on other service's
+    state, and thus need to be run after everyone has pulled their info out of
+    paxos.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 131686980f0a930d5de7cbce8234fead5bd438b6)
+
+commit 90f5c448abeb127ae5a5528a79bd7bdbc74cb497
+Author: Greg Farnum <greg@inktank.com>
+Date:   Thu Jun 27 14:58:14 2013 -0700
+
+    ceph-disk: s/else if/elif/
+    
+    Signed-off-by: Greg Farnum <greg@inktank.com>
+    Reviewed-by: Joao Luis <joao.luis@inktank.com>
+    (cherry picked from commit bd8255a750de08c1b8ee5e9c9a0a1b9b16171462)
+    (cherry picked from commit 9e604ee6943fdb131978afbec51321050faddfc6)
+
+commit 5c4bb463dca5aa61ea5f02f7592d5a3cc82cf6f4
+Author: Yehuda Sadeh <yehuda@inktank.com>
+Date:   Wed Jun 26 11:28:57 2013 -0700
+
+    rgw: fix radosgw-admin buckets list
+    
+    Fixes: #5455
+    Backport: cuttlefish
+    This commit fixes a regression, where radosgw-admin buckets list
+    operation wasn't returning any data.
+    
+    Signed-off-by: Yehuda Sadeh <yehuda@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e1f9fe58d2860fcbb18c92d3eb3946236b49a6ce)
+
+commit b2fb48762f32279e73feb83b220339fea31275e9
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jun 19 17:27:49 2013 -0700
+
+    ceph-disk: use unix lock instead of lockfile class
+    
+    The lockfile class relies on file system trickery to get safe mutual
+    exclusion.  However, the unix syscalls do this for us.  More
+    importantly, the unix locks go away when the owning process dies, which
+    is behavior that we want here.
+    
+    Fixes: #5387
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Dan Mick <dan.mick@inktank.com>
+    (cherry picked from commit 2a4953b697a3464862fd3913336edfd7eede2487)
+
+commit 26e7a6fffde4abcb685f34247e8491c05ee2a68d
+Author: Sage Weil <sage@inktank.com>
+Date:   Wed Jun 26 18:27:49 2013 -0700
+
+    ceph-disk: do not mount over an osd directly in /var/lib/ceph/osd/$cluster-$id
+    
+    If we see a 'ready' file in the target OSD dir, do not mount our device
+    on top of it.
+    
+    Among other things, this prevents ceph-disk activate on stray disks from
+    stepping on teuthology osds.
+    
+    Fixes: #5445
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 8a17f33b14d858235dfeaa42be1f4842dcfd66d2)
+
+commit ccb3dd5ad5533ca4e9b656b4e3df31025a5f2017
+Author: Yan, Zheng <zheng.z.yan@intel.com>
+Date:   Tue Apr 2 15:46:51 2013 +0800
+
+    mds: fix underwater dentry cleanup
+    
+    If the underwater dentry is a remove link, we shouldn't mark the
+    inode clean
+    
+    Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
+    (cherry picked from commit 81d073fecb58e2294df12b71351321e6d2e69652)
+
+commit 3020c5ea07a91475a7261dc2b810f5b61a1ae1f2
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 24 18:51:07 2013 -0700
+
+    mon/Elector: cancel election timer if we bootstrap
+    
+    If we short-circuit and bootstrap, cancel our timer.  Otherwise it will
+    go off some time later when we are in who knows what state.
+    
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 9ae0ec83dabe37ac15e5165559debdfef7a5f91d)
+
+commit 305f0c50a5f0ffabc73e10bdf4590217d5d5d211
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 24 18:12:11 2013 -0700
+
+    mon: cancel probe timeout on reset
+    
+    If we are probing and get (say) an election timeout that calls reset(),
+    cancel the timer.  Otherwise, we assert later with a splat like
+    
+    2013-06-24 01:09:33.675882 7fb9627e7700  4 mon.b@0(leader) e1 probe_timeout 0x307a520
+    2013-06-24 01:09:33.676956 7fb9627e7700 -1 mon/Monitor.cc: In function 'void Monitor::probe_timeout(int)' thread 7fb9627e7700 time 2013-06-24 01:09:43.675904
+    mon/Monitor.cc: 1888: FAILED assert(is_probing() || is_synchronizing())
+    
+     ceph version 0.64-613-g134d08a (134d08a9654f66634b893d493e4a92f38acc63cf)
+     1: (Monitor::probe_timeout(int)+0x161) [0x56f5c1]
+     2: (Context::complete(int)+0xa) [0x574a2a]
+     3: (SafeTimer::timer_thread()+0x425) [0x7059a5]
+     4: (SafeTimerThread::entry()+0xd) [0x7065dd]
+     5: (()+0x7e9a) [0x7fb966f62e9a]
+     6: (clone()+0x6d) [0x7fb9652f9ccd]
+     NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this.
+    
+    Fixes: #5438
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 03d3be3eaa96a8e72754c36abd6f355c68d52d59)
+
+commit a8f601d543168f4cdbddf674479d8de4b8dfc732
+Author: Alexandre Maragone <alexandre.marangone@inktank.com>
+Date:   Tue Jun 18 16:18:01 2013 -0700
+
+    ceph-disk: make list_partition behave with unusual device names
+    
+    When you get device names like sdaa you do not want to mistakenly conclude that
+    sdaa is a partition of sda.  Use /sys/block/$device/$partition existence
+    instead.
+    
+    Fixes: #5211
+    Backport: cuttlefish
+    Signed-off-by: Alexandre Maragone <alexandre.maragone@inktank.com>
+    Reviewed-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 8c0daafe003935881c5192e0b6b59b949269e5ae)
+
+commit 1c890f5cdfc596588e54fffeb016b4a5e9e2124c
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 17 20:28:24 2013 -0700
+
+    client: fix warning
+    
+    client/Client.cc: In member function 'virtual void Client::ms_handle_remote_reset(Connection*)':
+    warning: client/Client.cc:7892:9: enumeration value 'STATE_NEW' not handled in switch [-Wswitch]
+    warning: client/Client.cc:7892:9: enumeration value 'STATE_OPEN' not handled in switch [-Wswitch]
+    warning: client/Client.cc:7892:9: enumeration value 'STATE_CLOSED' not handled in switch [-Wswitch]
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: David Zafman <david.zafman@inktank.com>
+    (cherry picked from commit 8bd936f077530dfeb2e699164e4492b1c0973088)
+
+commit c3b97591fd8206825bcfe65bdb24fbc75a2a9b42
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 24 17:58:48 2013 -0700
+
+    mon/AuthMonitor: ensure initial rotating keys get encoded when create_initial called 2x
+    
+    The create_initial() method may get called multiple times; make sure it
+    will unconditionally generate new/initial rotating keys.  Move the block
+    up so that we can easily assert as much.
+    
+    Broken by commit cd98eb0c651d9ee62e19c2cc92eadae9bed678cd.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Yehuda Sadeh <yehuda@inktank.com>
+    (cherry picked from commit 521fdc2a4e65559b3da83283e6ca607b6e55406f)
+
+commit 0cc826c385edb2e327505696491d3ff1c3bfe8fd
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 24 17:42:04 2013 -0700
+
+    init-radosgw.sysv: remove -x debug mode
+    
+    Fixes: #5443
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 31d6062076fdbcd2691c07a23b381b26abc59f65)
+
+commit 4d57c12faceb7f591f10776c6850d98da55c667b
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 24 12:52:44 2013 -0700
+
+    common/pick_addresses: behave even after internal_safe_to_start_threads
+    
+    ceph-mon recently started using Preforker to working around forking issues.
+    As a result, internal_safe_to_start_threads got set sooner and calls to
+    pick_addresses() which try to set string config values now fail because
+    there are no config observers for them.
+    
+    Work around this by observing the change while we adjust the value.  We
+    assume pick_addresses() callers are smart enough to realize that their
+    result will be reflected by cct->_conf and not magically handled elsewhere.
+    
+    Fixes: #5195, #5205
+    Backport: cuttlefish
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Dan Mick <dan.mick@inktank.com>
+    (cherry picked from commit eb86eebe1ba42f04b46f7c3e3419b83eb6fe7f9a)
+
+commit e1ac7c6c3ca673d08710829aa5a3c03735710486
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jun 20 15:39:23 2013 -0700
+
+    mon/PaxosService: allow paxos service writes while paxos is updating
+    
+    In commit f985de28f86675e974ac7842a49922a35fe24c6c I mistakenly made
+    is_writeable() false while paxos was updating due to a misread of
+    Paxos::propose_new_value() (I didn't see that it would queue).
+    This is problematic because it narrows the window during which each service
+    is writeable for no reason.
+    
+    Allow service to be writeable both when paxos is active and updating.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 11169693d086e67dcf168ce65ef6e13eebd1a1ab)
+
+commit 02b0b4a9acb439b2ee5deadc8b02492006492931
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 7 11:41:21 2013 -0700
+
+    mon/PaxosService: not active during paxos UPDATING_PREVIOUS
+    
+    Treat this as an extension of the recovery process, e.g.
+    
+     RECOVERING -> ACTIVE
+    or
+     RECOVERING -> UPDATING_PREVIOUS -> ACTIVE
+    
+    and we are not active until we get to "the end" in both cases.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 392a8e21f8571b410c85be2129ef62dd6fc52b54)
+
+commit c6d5dc4d47838c8c8f4d059b7d018dea3f9c4425
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 7 11:40:22 2013 -0700
+
+    mon: simplify states
+    
+    - make states mutually exclusive (an enum)
+    - rename locked -> updating_previous
+    - set state prior to begin() to simplify things a bit
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit ee34a219605d1943740fdae0d84cfb9020302dd6)
+
+commit c43b1f4dff254df96144b0b4d569cc72421a8fff
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 7 11:14:58 2013 -0700
+
+    mon/Paxos: not readable when LOCKED
+    
+    If we are re-proposing a previously accepted value from a previous quorum,
+    we should not consider it readable, because it is possible it was exposed
+    to clients as committed (2/3 accepted) but not recored to be committed, and
+    we do not want to expose old state as readable when new state was
+    previously readable.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit ec2ea86ed55e00265c2cc5ad0c94460b4c92865c)
+
+commit 10d41200622d76dbf276602828584e7153cb22b5
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 7 11:07:38 2013 -0700
+
+    mon/Paxos: cleanup: drop unused PREPARING state bit
+    
+    This is never set when we block, and nobody looks at it.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 7b7ea8e30e20704caad9a841332ecb2e39819a41)
+
+commit 9d7c40e3f4ea2dd969aa0264ea8a6ad74f3e678a
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jun 6 15:20:05 2013 -0700
+
+    mon/PaxosService: simplify is_writeable
+    
+    Recast this in terms of paxos check + our conditions, and make it
+    match wait_for_writeable().
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit f985de28f86675e974ac7842a49922a35fe24c6c)
+
+commit 35745cba8985c5f3238e3c28fd28b194fae043d9
+Author: Sage Weil <sage@inktank.com>
+Date:   Tue Jun 4 17:03:15 2013 -0700
+
+    mon/PaxosService: simplify readable check
+    
+    Recast this in terms of the paxos check and our additional conditions,
+    which match wait_for_readable().
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 3aa61a0beb540e48bf61ceded766d6ff52c95eb2)
+
+commit 57c89291a48c319907fb3029746d9f5a4bd9dd61
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri May 31 16:45:08 2013 -0700
+
+    mon: simplify Monitor::init_paxos()
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e832e76a4af04b091c806ad412bcfd0326d75a2d)
+
+commit fd1769cb2d61e8f2c7921a78760e8f12b28258fb
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri May 31 16:39:37 2013 -0700
+
+    mon/Paxos: go active *after* refreshing
+    
+    The update_from_paxos() methods occasionally like to trigger new activity.
+    As long as they check is_readable() and is_writeable(), they will defer
+    until we go active and that activity will happen in the normal callbacks.
+    
+    This fixes the problem where we active but is_writeable() is still false,
+    triggered by PGMonitor::check_osd_map().
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e68b1bd36ed285e38a558899f83cf224d3aa60ed)
+
+commit cf75478d027dfd377424988745230d096dae79ac
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri May 31 15:32:06 2013 -0700
+
+    mon: safely signal bootstrap from MonmapMonitor::update_from_paxos()
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit dc83430124a5fd37573202a4cc0986c3c03739ef)
+
+commit 6ac58cd9c1f9c80c5f3cbe97e19cfcd8427db46d
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jun 2 16:57:11 2013 -0700
+
+    mon/Paxos: do paxos refresh in finish_proposal; and refactor
+    
+    Do the paxos refresh inside finish_proposal, ordered *after* the leader
+    assertion so that MonmapMonitor::update_from_paxos() calling bootstrap()
+    does not kill us.
+    
+    Also, remove unnecessary finish_queued_proposal() and move the logic inline
+    where the bad leader assertion is obvious.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit a42d7582f816b45f5d19c393fd45447555e78fdd)
+
+commit 054e96d96533b1c4078402e43184f13b97329905
+Author: Joao Eduardo Luis <joao.luis@inktank.com>
+Date:   Sun Jun 2 16:15:02 2013 -0700
+
+    mon/PaxosService: cache {first,last}_committed
+    
+    Refresh the in-memory values when we are told the on-disk paxos state
+    may have changed.
+    
+    Signed-off-by: Joao Eduardo Luis <joao.luis@inktank.com>
+    (cherry picked from commit 2fccb300bdf6ffd44db3462eb05115da11322ed4)
+
+commit 265212a7384399bf85e15e6978bc7543824c0e92
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri May 31 14:30:48 2013 -0700
+
+    mon: no need to refresh from _active
+    
+    The refresh is done explicitly by the monitor, independent of the more
+    fragile PaxosService callbacks.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit d941363d6e4249e97b64faff0e573f75e918ac0c)
+
+commit 1d8662504299babec22c714662cefbb86a0acb8b
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jun 2 16:10:57 2013 -0700
+
+    mon: remove unnecessary update_from_paxos calls
+    
+    The refresh() will do this when the state changes; no need to
+    opportunistically call this method all of the time.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 03014a4ecc06cde420fad0c6c2a0177ebd7b839d)
+
+commit 34acc5a3161b6bcda2b9f7ce18d89a8618fff1c5
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jun 2 16:14:01 2013 -0700
+
+    mon: explicitly refresh_from_paxos() when leveldb state changes
+    
+    Instead of opportunistically calling each service's update_from_paxos(),
+    instead explicitly refresh all in-memory state whenever we know the
+    paxos state may have changed.  This is simpler and less fragile.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit cc339c07312006e65854207523f50542d00ecf87)
+
+commit 4474a0cc6c009a566ecf46efadb39d80343a7c68
+Author: Sage Weil <sage@inktank.com>
+Date:   Sun Jun 23 09:25:55 2013 -0700
+
+    mon/AuthMonitor: make initial auth include rotating keys
+    
+    This closes a very narrow race during mon creation where there are no
+    service keys.
+    
+    Fixes: #5427
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit cd98eb0c651d9ee62e19c2cc92eadae9bed678cd)
+
+commit d572cf6f77418f217a5a8e37f1124dc566e24d0b
+Author: Sage Weil <sage@inktank.com>
+Date:   Fri Jun 21 11:53:29 2013 -0700
+
+    mds: fix iterator invalidation for backtrace removal
+    
+    - Don't increment before we dereference!
+    - We need to update the iterator before we delete the item.
+    
+    This code is changed in master, so this fix is for cuttlefish only.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    Reviewed-by: Greg Farnum <greg@inktank.com>
+
+commit 50957772c3582290331f69ba4a985b1cdf86834d
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu May 9 09:44:20 2013 -0700
+
+    osd: init test_ops_hook
+    
+    CID 1019628 (#1 of 1): Uninitialized pointer field (UNINIT_CTOR)
+    2. uninit_member: Non-static class member "test_ops_hook" is not initialized in this constructor nor in any functions that it calls.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit e30a03210c3efb768b1653df5ae58917ef26e579)
+
+commit 17d2745f095e7bb640dece611d7824d370ea3b81
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu May 9 09:45:51 2013 -0700
+
+    osd: initialize OSDService::next_notif_id
+    
+    CID 1019627 (#1 of 1): Uninitialized scalar field (UNINIT_CTOR)
+    2. uninit_member: Non-static class member "next_notif_id" is not initialized in this constructor nor in any functions that it calls.
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 499edd8bfc355c2d590f5fa1ef197d1ea5680351)
+
+commit ffdb7236a994aa20b5f75860b9c81dac0f131f9a
+Author: Sage Weil <sage@inktank.com>
+Date:   Thu Jun 20 09:46:42 2013 -0700
+
+    mon: more fix dout use in sync_requester_abort()
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit d60534b8f59798feaeeaa17adba2a417d7777cbf)
+
+commit 38ddae04bb974a93f1718c509363f1afbe6b612d
+Author: Sage Weil <sage@inktank.com>
+Date:   Mon Jun 10 11:48:25 2013 -0700
+
+    mon: fix raw use of *_dout in sync_requester_abort()
+    
+    Signed-off-by: Sage Weil <sage@inktank.com>
+    (cherry picked from commit 8a4ed58e39b287fd8667c62b45848487515bdc80)
diff --git a/doc/install/rpm.rst b/doc/install/rpm.rst
index d5d6bf196c2..d96628a0f95 100644
--- a/doc/install/rpm.rst
+++ b/doc/install/rpm.rst
@@ -16,6 +16,27 @@ release key to your system's list of trusted keys to avoid a security warning::
     sudo rpm --import 'https://ceph.com/git/?p=ceph.git;a=blob_plain;f=keys/release.asc'
 
 
+Install Prerequisites
+=====================
+
+Ceph may require additional additional third party libraries. 
+To add the EPEL repository, execute the following:: 
+
+   su -c 'rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm'
+
+Some releases of Ceph require the following packages:
+
+- snappy
+- leveldb
+- gdisk
+- python-argparse
+- gperftools-libs
+
+To install these packages, execute the following::  
+
+	sudo yum install snappy leveldb gdisk python-argparse gperftools-libs
+
+
 Add Release Packages
 ====================
 
@@ -31,13 +52,9 @@ Packages are currently built for the RHEL/CentOS6 (``el6``), Fedora 17
 platforms. The repository package installs the repository details on
 your local system for use with ``yum`` or ``up2date``.
 
-Replase the``{DISTRO}`` below with the distro codename::
-
-    su -c 'rpm -Uvh http://ceph.com/rpm-cuttlefish/{DISTRO}/x86_64/ceph-release-1-0.el6.noarch.rpm'
-
 For example, for CentOS 6 or other RHEL6 derivatives (``el6``)::
 
-    su -c 'rpm -Uvh http://ceph.com/rpm-cuttlefish/el6/x86_64/ceph-release-1-0.el6.noarch.rpm'
+    su -c 'rpm -Uvh http://ceph.com/rpm-cuttlefish/el6/noarch/ceph-release-1-0.el6.noarch.rpm'
 
 You can download the RPMs directly from::
 
@@ -99,11 +116,23 @@ You can download the RPMs directly from::
      http://ceph.com/rpm-testing
 
 
+
+Installing Ceph Deploy
+======================
+
+Once you have added either release or development packages to ``yum``, you
+can install ``ceph-deploy``. ::
+
+	sudo yum install ceph-deploy python-pushy
+
+
+
 Installing Ceph Packages
 ========================
 
 Once you have added either release or development packages to ``yum``, you
-can install Ceph::
+can install Ceph packages. You can also use ``ceph-deploy`` to install Ceph
+packages. ::
 
 	sudo yum install ceph
 
@@ -198,7 +227,7 @@ Installing Ceph Object Storage
 #. Create a user key. ::
 
 	ceph-authtool -C -n client.radosgw.gateway --gen-key /etc/ceph/keyring.radosgw.gateway
-	ceph-authtool -n client.radosgw.gateway --cap mon 'allow r' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway
+	ceph-authtool -n client.radosgw.gateway --cap mon 'allow rw' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway
 	ceph auth add client.radosgw.gateway --in-file=/etc/ceph/keyring.radosgw.gateway
 	
 	
diff --git a/doc/man/8/radosgw.rst b/doc/man/8/radosgw.rst
index 46511f9afe6..0fb114973f5 100644
--- a/doc/man/8/radosgw.rst
+++ b/doc/man/8/radosgw.rst
@@ -86,7 +86,7 @@ You will also have to generate a key for the radosgw to use for
 authentication with the cluster::
 
         ceph-authtool -C -n client.radosgw.gateway --gen-key /etc/ceph/keyring.radosgw.gateway
-        ceph-authtool -n client.radosgw.gateway --cap mon 'allow r' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway
+        ceph-authtool -n client.radosgw.gateway --cap mon 'allow rw' --cap osd 'allow rwx' /etc/ceph/keyring.radosgw.gateway
 
 And add the key to the auth entries::
 
diff --git a/doc/rados/operations/authentication.rst b/doc/rados/operations/authentication.rst
index d56f6ef584a..0b71d08b0c4 100644
--- a/doc/rados/operations/authentication.rst
+++ b/doc/rados/operations/authentication.rst
@@ -196,7 +196,7 @@ capabilities necessary for the daemon to function, are shown below.
 ``radosgw``
 
 :Location: ``$rgw_data/keyring``
-:Capabilities: ``mon 'allow r' osd 'allow rwx'``
+:Capabilities: ``mon 'allow rw' osd 'allow rwx'``
 
 
 Note that the monitor keyring contains a key but no capabilities, and
diff --git a/doc/radosgw/config.rst b/doc/radosgw/config.rst
index d7526fdd776..615a979fb5d 100644
--- a/doc/radosgw/config.rst
+++ b/doc/radosgw/config.rst
@@ -164,7 +164,7 @@ Generate a key so that RADOS Gateway can identify a user name and authenticate
 the user with the cluster. Then, add capabilities to the key. For example:: 
 
 	sudo ceph-authtool /etc/ceph/keyring.radosgw.gateway -n client.radosgw.gateway --gen-key
-	sudo ceph-authtool -n client.radosgw.gateway --cap osd 'allow rwx' --cap mon 'allow r' /etc/ceph/keyring.radosgw.gateway
+	sudo ceph-authtool -n client.radosgw.gateway --cap osd 'allow rwx' --cap mon 'allow rw' /etc/ceph/keyring.radosgw.gateway
 	
 
 Add to Ceph Keyring Entries 
@@ -173,7 +173,7 @@ Add to Ceph Keyring Entries
 Once you have created a keyring and key for RADOS GW, add it as an entry in
 the Ceph keyring. For example::
 
-	sudo ceph -k /etc/ceph/ceph.keyring auth add client.radosgw.gateway -i /etc/ceph/keyring.radosgw.gateway
+	sudo ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.radosgw.gateway -i /etc/ceph/keyring.radosgw.gateway
 	
 
 Restart Services and Start the RADOS Gateway
diff --git a/doc/rbd/rbd-snapshot.rst b/doc/rbd/rbd-snapshot.rst
index 9b209777df5..0152258df63 100644
--- a/doc/rbd/rbd-snapshot.rst
+++ b/doc/rbd/rbd-snapshot.rst
@@ -287,13 +287,13 @@ Listing Children of a Snapshot
 
 To list the children of a snapshot, execute the following::
 
-	rbd --pool {pool-name} snap children --image {image-name} --snap {snap-name}
-	rbd snap children {pool-name}/{image-name}@{snapshot-name}
+	rbd --pool {pool-name} children --image {image-name} --snap {snap-name}
+	rbd children {pool-name}/{image-name}@{snapshot-name}
 
 For example::
 
-	rbd --pool rbd snap children --image my-image --snap my-snapshot
-	rbd snap children rbd/my-image@my-snapshot
+	rbd --pool rbd children --image my-image --snap my-snapshot
+	rbd children rbd/my-image@my-snapshot
 
 
 Flattening a Cloned Image
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index ba3b9be8363..f5e76febbac 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -15,7 +15,7 @@ Upgrading
 Notable changes
 ~~~~~~~~~~~~~~~
 
-* osd: pg log (re)writes are not vastly more efficient (faster peering) (Sam Just)
+* osd: pg log (re)writes are now vastly more efficient (faster peering) (Sam Just)
 * osd: fixed problem with front-side heartbeats and mixed clusters (David Zafman)
 * mon: tuning, performance improvements
 * mon: simplify PaxosService vs Paxos interaction, fix readable/writeable checks
@@ -195,6 +195,51 @@ Notable Changes
  * misc code cleanups
 
 
+v0.61.5 "Cuttlefish"
+--------------------
+
+This release most improves stability of the monitor and fixes a few
+bugs with the ceph-disk utility (used by ceph-deploy).  We recommand
+that all v0.61.x users upgrade.
+
+Upgrading
+~~~~~~~~~
+
+* This release fixes a 32-bit vs 64-bit arithmetic bug with the
+  feature bits.  An unfortunate consequence of the fix is that 0.61.4
+  (or earlier) ceph-mon daemons can't form a quorum with 0.61.5 (or
+  later) monitors.  To avoid the possibility of service disruption, we
+  recommend you upgrade all monitors at once.
+
+Notable Changes
+~~~~~~~~~~~~~~~
+
+* mon: misc sync improvements (faster, more reliable, better tuning)
+* mon: enable leveldb cache by default (big performance improvement)
+* mon: new scrub feature (primarily for diagnostic, testing purposes)
+* mon: fix occasional leveldb assertion on startup
+* mon: prevent reads until initial state is committed
+* mon: improved logic for trimming old osdmaps
+* mon: fix pick_addresses bug when expanding mon cluster
+* mon: several small paxos fixes, improvements
+* mon: fix bug osdmap trim behavior
+* osd: fix several bugs with PG stat reporting
+* osd: limit number of maps shared with peers (which could cause domino failures)
+* rgw: fix radosgw-admin buckets list (for all buckets)
+* mds: fix occasional client failure to reconnect
+* mds: fix bad list traversal after unlink
+* mds: fix underwater dentry cleanup (occasional crash after mds restart)
+* libcephfs, ceph-fuse: fix occasional hangs on umount
+* libcephfs, ceph-fuse: fix old bug with O_LAZY vs O_NOATIME confusion
+* ceph-disk: more robust journal device detection on RHEL/CentOS
+* ceph-disk: better, simpler locking
+* ceph-disk: do not inadvertantely mount over existing osd mounts
+* ceph-disk: better handling for unusual device names
+* sysvinit, upstart: handle symlinks in /var/lib/ceph/*
+
+For more detailed information, see :download:`the complete changelog <changelog/v0.61.5.txt>`.
+
+
 v0.61.4 "Cuttlefish"
 --------------------
 
@@ -232,7 +277,7 @@ Notable Changes
 * ceph-fuse: fix thread creation on startup
 * all daemons: create /var/run/ceph directory on startup if missing
 
-For more detailed information, see :download:`the complete changelog <changelog/v0.61.3.txt>`.
+For more detailed information, see :download:`the complete changelog <changelog/v0.61.4.txt>`.
 
 
 v0.61.3 "Cuttlefish"
diff --git a/src/ceph.in b/src/ceph.in
index 6ba92c99b18..e6806786e7e 100755
--- a/src/ceph.in
+++ b/src/ceph.in
@@ -118,6 +118,8 @@ def parse_cmdargs(args=None, target=''):
 
     parser.add_argument('--admin-daemon', dest='admin_socket',
                         help='submit admin-socket commands (\"help\" for help')
+    parser.add_argument('--admin-socket', dest='admin_socket_nope',
+                        help='you probably mean --admin-daemon')
 
     parser.add_argument('-s', '--status', action='store_true',
                         help='show cluster status')
@@ -395,9 +397,9 @@ def find_cmd_target(childargs):
     right daemon.
     Returns ('osd', osdid), ('pg', pgid), or ('mon', '')
     """
-    sig = parse_funcsig(['tell', {'name':'target','type':'CephName'}])
+    sig = parse_funcsig(['tell', {'name':'target', 'type':'CephName'}])
     try:
-        valid_dict = validate(childargs, sig, partial=True);
+        valid_dict = validate(childargs, sig, partial=True)
         if len(valid_dict) == 2:
             name = CephName()
             name.valid(valid_dict['target'])
@@ -405,9 +407,9 @@ def find_cmd_target(childargs):
     except ArgumentError:
         pass
 
-    sig = parse_funcsig(['pg', {'name':'pgid','type':'CephPgid'}])
+    sig = parse_funcsig(['pg', {'name':'pgid', 'type':'CephPgid'}])
     try:
-        valid_dict = validate(childargs, sig, partial=True);
+        valid_dict = validate(childargs, sig, partial=True)
         if len(valid_dict) == 2:
             return 'pg', valid_dict['pgid']
     except ArgumentError:
@@ -489,6 +491,11 @@ def main():
     global verbose
     verbose = parsed_args.verbose
 
+    if parsed_args.admin_socket_nope:
+        print >> sys.stderr, '--admin-socket is used by daemons; '\
+        'you probably mean --admin-daemon/daemon'
+        return 1
+
     # pass on --id, --name, --conf
     name = 'client.admin'
     if parsed_args.client_id:
@@ -582,7 +589,7 @@ def main():
     # implement -w/--watch_*
     # This is ugly, but Namespace() isn't quite rich enough.
     level = ''
-    for k,v in parsed_args._get_kwargs():
+    for k, v in parsed_args._get_kwargs():
         if k.startswith('watch') and v:
             if k == 'watch':
                 level = 'info'
@@ -670,8 +677,8 @@ def main():
         prefix = ''
         suffix = ''
         if not parsed_args.output_file and len(targets) > 1:
-            prefix='{0}.{1}: '.format(*target)
-            suffix='\n'
+            prefix = '{0}.{1}: '.format(*target)
+            suffix = '\n'
 
         ret, outbuf, outs = json_command(cluster_handle, target=target,
                                          prefix='get_command_descriptions')
@@ -733,7 +740,7 @@ def main():
             if parsed_args.output_format and \
                parsed_args.output_format.startswith('json') and \
                not compat:
-                sys.stdout.write('\n');
+                sys.stdout.write('\n')
 
             # if we are prettifying things, normalize newlines.  sigh.
             if suffix != '':
diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc
index edb48bd96d8..88b807b1b24 100644
--- a/src/ceph_mds.cc
+++ b/src/ceph_mds.cc
@@ -274,12 +274,6 @@ int main(int argc, const char **argv)
 
   messenger->start();
 
-  // set up signal handlers, now that we've daemonized/forked.
-  init_async_signal_handler();
-  register_async_signal_handler(SIGHUP, sighup_handler);
-  register_async_signal_handler_oneshot(SIGINT, handle_mds_signal);
-  register_async_signal_handler_oneshot(SIGTERM, handle_mds_signal);
-
   // start mds
   mds = new MDS(g_conf->name.get_id().c_str(), messenger, &mc);
 
@@ -291,16 +285,26 @@ int main(int argc, const char **argv)
     r = mds->init(shadow);
   else
     r = mds->init();
+  if (r < 0)
+    goto shutdown;
 
-  if (r >= 0) {
-    messenger->wait();
-  }
+  // set up signal handlers, now that we've daemonized/forked.
+  init_async_signal_handler();
+  register_async_signal_handler(SIGHUP, sighup_handler);
+  register_async_signal_handler_oneshot(SIGINT, handle_mds_signal);
+  register_async_signal_handler_oneshot(SIGTERM, handle_mds_signal);
+
+  if (g_conf->inject_early_sigterm)
+    kill(getpid(), SIGTERM);
+
+  messenger->wait();
 
   unregister_async_signal_handler(SIGHUP, sighup_handler);
   unregister_async_signal_handler(SIGINT, handle_mds_signal);
   unregister_async_signal_handler(SIGTERM, handle_mds_signal);
   shutdown_async_signal_handler();
 
+ shutdown:
   // yuck: grab the mds lock, so we can be sure that whoever in *mds
   // called shutdown finishes what they were doing.
   mds->mds_lock.Lock();
@@ -313,14 +317,15 @@ int main(int argc, const char **argv)
   if (mds->is_stopped())
     delete mds;
 
+  g_ceph_context->put();
+
   // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
   char s[20];
   snprintf(s, sizeof(s), "gmon/%d", getpid());
   if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
-    dout(0) << "ceph-mds: gmon.out should be in " << s << dendl;
+    cerr << "ceph-mds: gmon.out should be in " << s << std::endl;
   }
 
-  generic_dout(0) << "stopped." << dendl;
   return 0;
 }
 
diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc
index 6ac22ba20e5..35ed56a7985 100644
--- a/src/ceph_mon.cc
+++ b/src/ceph_mon.cc
@@ -542,15 +542,18 @@ int main(int argc, const char **argv)
   if (g_conf->daemonize)
     prefork.daemonize();
 
+  messenger->start();
+
+  mon->init();
+
   // set up signal handlers, now that we've daemonized/forked.
   init_async_signal_handler();
   register_async_signal_handler(SIGHUP, sighup_handler);
   register_async_signal_handler_oneshot(SIGINT, handle_mon_signal);
   register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal);
 
-  messenger->start();
-
-  mon->init();
+  if (g_conf->inject_early_sigterm)
+    kill(getpid(), SIGTERM);
 
   messenger->wait();
 
diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc
index b485133514e..d8590bff817 100644
--- a/src/ceph_osd.cc
+++ b/src/ceph_osd.cc
@@ -451,12 +451,6 @@ int main(int argc, const char **argv)
   messenger_hb_back_server->start();
   cluster_messenger->start();
 
-  // install signal handlers
-  init_async_signal_handler();
-  register_async_signal_handler(SIGHUP, sighup_handler);
-  register_async_signal_handler_oneshot(SIGINT, handle_osd_signal);
-  register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal);
-
   // start osd
   err = osd->init();
   if (err < 0) {
@@ -465,6 +459,15 @@ int main(int argc, const char **argv)
     return 1;
   }
 
+  // install signal handlers
+  init_async_signal_handler();
+  register_async_signal_handler(SIGHUP, sighup_handler);
+  register_async_signal_handler_oneshot(SIGINT, handle_osd_signal);
+  register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal);
+
+  if (g_conf->inject_early_sigterm)
+    kill(getpid(), SIGTERM);
+
   client_messenger->wait();
   messenger_hbclient->wait();
   messenger_hb_front_server->wait();
diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc
index 79171da46f1..cb211f5461b 100644
--- a/src/client/SyntheticClient.cc
+++ b/src/client/SyntheticClient.cc
@@ -1470,8 +1470,7 @@ int SyntheticClient::play_trace(Trace& t, string& prefix, bool metadata_only)
   dout(10) << "trace finished on line " << t.get_line() << dendl;
 
   // wait for safe after an object trace
-  safegref->finish(0);
-  delete safegref;
+  safegref->complete(0);
   lock.Lock();
   while (!safe) {
     dout(10) << "waiting for safe" << dendl;
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index 83395534d0d..ce0c6de1260 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -14,6 +14,7 @@
 
 #define FUSE_USE_VERSION 26
 
+#include <fuse/fuse.h>
 #include <fuse/fuse_lowlevel.h>
 #include <signal.h>
 #include <stdio.h>
@@ -520,7 +521,7 @@ static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
     return 0;
   }
 
-  *sgids = malloc(c*sizeof(**sgids));
+  *sgids = (gid_t*)malloc(c*sizeof(**sgids));
   if (!*sgids) {
     return -ENOMEM;
   }
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index defb71ee514..b43808e211c 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -123,6 +123,8 @@ OPTION(ms_inject_delay_max, OPT_DOUBLE, 1)         // seconds
 OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
 OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0)   // seconds
 
+OPTION(inject_early_sigterm, OPT_BOOL, false)
+
 OPTION(mon_data, OPT_STR, "/var/lib/ceph/mon/$cluster-$id")
 OPTION(mon_initial_members, OPT_STR, "")    // list of initial cluster mon ids; if specified, need majority to form initial quorum and create new cluster
 OPTION(mon_sync_fs_threshold, OPT_INT, 5)   // sync() when writing this many objects; 0 to disable.
@@ -207,6 +209,7 @@ OPTION(paxos_trim_min, OPT_INT, 250)  // number of extra proposals tolerated bef
 OPTION(paxos_trim_max, OPT_INT, 500) // max number of extra proposals to trim at a time
 OPTION(paxos_service_trim_min, OPT_INT, 250) // minimum amount of versions to trigger a trim (0 disables it)
 OPTION(paxos_service_trim_max, OPT_INT, 500) // maximum amount of versions to trim during a single proposal (0 disables it)
+OPTION(paxos_kill_at, OPT_INT, 0)
 OPTION(clock_offset, OPT_DOUBLE, 0) // how much to offset the system clock in Clock.cc
 OPTION(auth_cluster_required, OPT_STR, "cephx")   // required of mon, mds, osd daemons
 OPTION(auth_service_required, OPT_STR, "cephx")   // required by daemons of clients
@@ -675,6 +678,8 @@ OPTION(rgw_md_log_max_shards, OPT_INT, 64) // max shards for metadata log
 OPTION(rgw_num_zone_opstate_shards, OPT_INT, 128) // max shards for keeping inter-region copy progress info
 OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updates on a single upload (0 for disabling ratelimit)
 OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
+OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
+OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
 
 OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
 OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
diff --git a/src/include/Context.h b/src/include/Context.h
index e31fca6a426..9ec4414a047 100644
--- a/src/include/Context.h
+++ b/src/include/Context.h
@@ -34,10 +34,12 @@ class Context {
   Context(const Context& other);
   const Context& operator=(const Context& other);
 
+ protected:
+  virtual void finish(int r) = 0;
+
  public:
   Context() {}
   virtual ~Context() {}       // we want a virtual destructor!!!
-  virtual void finish(int r) = 0;
   virtual void complete(int r) {
     finish(r);
     delete this;
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc
index a7ab0488dc6..da02a34ed32 100644
--- a/src/librbd/LibrbdWriteback.cc
+++ b/src/librbd/LibrbdWriteback.cc
@@ -32,8 +32,7 @@ namespace librbd {
   void context_cb(rados_completion_t c, void *arg)
   {
     Context *con = reinterpret_cast<Context *>(arg);
-    con->finish(rados_aio_get_return_value(c));
-    delete con;
+    con->complete(rados_aio_get_return_value(c));
   }
 
   /**
diff --git a/src/log/Log.cc b/src/log/Log.cc
index e06afbfe1e2..afeb1208002 100644
--- a/src/log/Log.cc
+++ b/src/log/Log.cc
@@ -42,9 +42,6 @@ Log::Log(SubsystemMap *s)
 {
   int ret;
 
-  ret = pthread_spin_init(&m_lock, PTHREAD_PROCESS_SHARED);
-  assert(ret == 0);
-
   ret = pthread_mutex_init(&m_flush_mutex, NULL);
   assert(ret == 0);
 
@@ -73,7 +70,6 @@ Log::~Log()
   if (m_fd >= 0)
     TEMP_FAILURE_RETRY(::close(m_fd));
 
-  pthread_spin_destroy(&m_lock);
   pthread_mutex_destroy(&m_queue_mutex);
   pthread_mutex_destroy(&m_flush_mutex);
   pthread_cond_destroy(&m_cond_loggers);
diff --git a/src/log/Log.h b/src/log/Log.h
index f6a27dc5b37..b5e16fdde79 100644
--- a/src/log/Log.h
+++ b/src/log/Log.h
@@ -21,7 +21,6 @@ class Log : private Thread
 
   SubsystemMap *m_subs;
   
-  pthread_spinlock_t m_lock;
   pthread_mutex_t m_queue_mutex;
   pthread_mutex_t m_flush_mutex;
   pthread_cond_t m_cond_loggers;
diff --git a/src/mds/AnchorClient.cc b/src/mds/AnchorClient.cc
index bcc8710e43f..30cbfd34f74 100644
--- a/src/mds/AnchorClient.cc
+++ b/src/mds/AnchorClient.cc
@@ -51,8 +51,7 @@ void AnchorClient::handle_query_result(class MMDSTableRequest *m)
   for (list<_pending_lookup>::iterator q = ls.begin(); q != ls.end(); ++q) {
     *q->trace = trace;
     if (q->onfinish) {
-      q->onfinish->finish(0);
-      delete q->onfinish;
+      q->onfinish->complete(0);
     }
   }
 }
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 48529948955..d215d18690f 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -914,8 +914,7 @@ void CInode::_stored(version_t v, Context *fin)
   if (v == get_projected_version())
     mark_clean();
 
-  fin->finish(0);
-  delete fin;
+  fin->complete(0);
 }
 
 struct C_Inode_Fetched : public Context {
@@ -964,13 +963,12 @@ void CInode::_fetched(bufferlist& bl, bufferlist& bl2, Context *fin)
   if (magic != CEPH_FS_ONDISK_MAGIC) {
     dout(0) << "on disk magic '" << magic << "' != my magic '" << CEPH_FS_ONDISK_MAGIC
 	    << "'" << dendl;
-    fin->finish(-EINVAL);
+    fin->complete(-EINVAL);
   } else {
     decode_store(p);
     dout(10) << "_fetched " << *this << dendl;
-    fin->finish(0);
+    fin->complete(0);
   }
-  delete fin;
 }
 
 void CInode::build_backtrace(int64_t pool, inode_backtrace_t& bt)
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index e592dde96ca..77d3d8b97b8 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -510,8 +510,7 @@ void MDCache::_create_system_file_finish(Mutation *mut, CDentry *dn, version_t d
   mut->cleanup();
   delete mut;
 
-  fin->finish(0);
-  delete fin;
+  fin->complete(0);
 
   //if (dir && MDS_INO_IS_MDSDIR(in->ino()))
   //migrator->export_dir(dir, (int)in->ino() - MDS_INO_MDSDIR_OFFSET);
@@ -3093,8 +3092,7 @@ void MDCache::handle_resolve_ack(MMDSResolveAck *ack)
       if (mdr->more()->slave_commit) {
 	Context *fin = mdr->more()->slave_commit;
 	mdr->more()->slave_commit = 0;
-	fin->finish(-1);
-	delete fin;
+	fin->complete(-1);
       } else {
 	if (mdr->slave_request) 
 	  mdr->aborted = true;
@@ -7675,8 +7673,7 @@ public:
     mdcache(mdc), ino(i), want_xlocked(wx), onfinish(c) {}
   void finish(int r) {
     if (mdcache->get_inode(ino)) {
-      onfinish->finish(0);
-      delete onfinish;
+      onfinish->complete(0);
     } else
       mdcache->open_remote_ino(ino, onfinish, want_xlocked);
   }
@@ -7703,8 +7700,7 @@ public:
     if (r == 0)
       mdcache->open_remote_ino_2(ino, anchortrace, want_xlocked, hadino, hadv, onfinish);
     else {
-      onfinish->finish(r);
-      delete onfinish;
+      onfinish->complete(r);
     }
   }
 };
@@ -7753,8 +7749,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino, vector<Anchor>& anchortrace, bool
   if (in->ino() == ino) {
     // success
     dout(10) << "open_remote_ino_2 have " << *in << dendl;
-    onfinish->finish(0);
-    delete onfinish;
+    onfinish->complete(0);
     return;
   } 
 
@@ -7795,8 +7790,7 @@ void MDCache::open_remote_ino_2(inodeno_t ino, vector<Anchor>& anchortrace, bool
 	dout(10) << "expected ino " << anchortrace[i].ino
 		 << " in complete dir " << *dir
 		 << ", got same anchor " << anchortrace[i] << " 2x in a row" << dendl;
-	onfinish->finish(-ENOENT);
-	delete onfinish;
+	onfinish->complete(-ENOENT);
       } else {
 	// hrm.  requery anchor table.
 	dout(10) << "expected ino " << anchortrace[i].ino
@@ -8408,8 +8402,7 @@ void MDCache::_do_find_ino_peer(find_ino_peer_info_t& fip)
       dout(10) << "_do_find_ino_peer waiting for more peers to be active" << dendl;
     } else {
       dout(10) << "_do_find_ino_peer failed on " << fip.ino << dendl;
-      fip.fin->finish(-ESTALE);
-      delete fip.fin;
+      fip.fin->complete(-ESTALE);
       find_ino_peer.erase(fip.tid);
     }
   } else {
@@ -8521,8 +8514,7 @@ void MDCache::_find_ino_dir(inodeno_t ino, Context *fin, bufferlist& bl, int r)
 {
   dout(10) << "_find_ino_dir " << ino << " got " << r << " " << bl.length() << " bytes" << dendl;
   if (r < 0) {
-    fin->finish(r);
-    delete fin;
+    fin->complete(r);
     return;
   }
 
@@ -8539,8 +8531,7 @@ void MDCache::_find_ino_dir(inodeno_t ino, Context *fin, bufferlist& bl, int r)
     return;
   delete c;  // path_traverse doesn't clean it up for us for r <= 0
   
-  fin->finish(r);
-  delete fin;
+  fin->complete(r);
 }
 
 
@@ -8619,8 +8610,7 @@ void MDCache::request_finish(MDRequest *mdr)
   if (mdr->more()->slave_commit) {
     Context *fin = mdr->more()->slave_commit;
     mdr->more()->slave_commit = 0;
-    fin->finish(0);   // this must re-call request_finish.
-    delete fin;
+    fin->complete(0);   // this must re-call request_finish.
     return; 
   }
 
diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc
index b293c4cc10a..3dfc00fc221 100644
--- a/src/mds/MDLog.cc
+++ b/src/mds/MDLog.cc
@@ -173,8 +173,7 @@ void MDLog::submit_entry(LogEvent *le, Context *c)
   if (!g_conf->mds_log) {
     // hack: log is disabled.
     if (c) {
-      c->finish(0);
-      delete c;
+      c->complete(0);
     }
     return;
   }
@@ -245,8 +244,7 @@ void MDLog::wait_for_safe(Context *c)
     journaler->wait_for_flush(c);
   } else {
     // hack: bypass.
-    c->finish(0);
-    delete c;
+    c->complete(0);
   }
 }
 
@@ -442,8 +440,7 @@ void MDLog::replay(Context *c)
   if (journaler->get_read_pos() == journaler->get_write_pos()) {
     dout(10) << "replay - journal empty, done." << dendl;
     if (c) {
-      c->finish(0);
-      delete c;
+      c->complete(0);
     }
     return;
   }
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index a867961ccf3..7dcf68822aa 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -1628,19 +1628,18 @@ void MDS::suicide()
   }
   timer.cancel_all_events();
   //timer.join();
+  timer.shutdown();
   
   // shut down cache
   mdcache->shutdown();
 
   if (objecter->initialized)
     objecter->shutdown_locked();
-  
-  // shut down messenger
-  messenger->shutdown();
 
   monc->shutdown();
 
-  timer.shutdown();
+  // shut down messenger
+  messenger->shutdown();
 }
 
 void MDS::respawn()
@@ -1890,8 +1889,7 @@ bool MDS::_dispatch(Message *m)
     ls.swap(finished_queue);
     while (!ls.empty()) {
       dout(10) << " finish " << ls.front() << dendl;
-      ls.front()->finish(0);
-      delete ls.front();
+      ls.front()->complete(0);
       ls.pop_front();
       
       // give other threads (beacon!) a chance
diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc
index 4b21f4feaa5..ef0326dfbd3 100644
--- a/src/mds/MDSTable.cc
+++ b/src/mds/MDSTable.cc
@@ -160,7 +160,6 @@ void MDSTable::load_2(int r, bufferlist& bl, Context *onfinish)
   decode_state(p);
 
   if (onfinish) {
-    onfinish->finish(0);
-    delete onfinish;
+    onfinish->complete(0);
   }
 }
diff --git a/src/mds/MDSTableClient.cc b/src/mds/MDSTableClient.cc
index b4781497068..cc3152f1d67 100644
--- a/src/mds/MDSTableClient.cc
+++ b/src/mds/MDSTableClient.cc
@@ -61,8 +61,7 @@ void MDSTableClient::handle_request(class MMDSTableRequest *m)
       pending_prepare.erase(reqid);
       prepared_update[tid] = reqid;
       if (onfinish) {
-        onfinish->finish(0);
-        delete onfinish;
+        onfinish->complete(0);
       }
     }
     else if (prepared_update.count(tid)) {
diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h
index c0bea19d16e..b50a03cefa4 100644
--- a/src/mds/Mutation.h
+++ b/src/mds/Mutation.h
@@ -330,8 +330,7 @@ struct MDSlaveUpdate {
   ~MDSlaveUpdate() {
     item.remove_myself();
     if (waiter)
-      waiter->finish(0);
-    delete waiter;
+      waiter->complete(0);
   }
 };
 
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 7e484e8db6b..f537c915945 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -605,13 +605,13 @@ void Monitor::shutdown()
   finish_contexts(g_ceph_context, waitfor_quorum, -ECANCELED);
   finish_contexts(g_ceph_context, maybe_wait_for_quorum, -ECANCELED);
 
-
   timer.shutdown();
 
+  remove_all_sessions();
+
   // unlock before msgr shutdown...
   lock.Unlock();
 
-  remove_all_sessions();
   messenger->shutdown();  // last thing!  ceph_mon.cc will delete mon.
 }
 
diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc
index ee2ba3b6fdb..508669deef5 100644
--- a/src/mon/Paxos.cc
+++ b/src/mon/Paxos.cc
@@ -103,11 +103,21 @@ void Paxos::collect(version_t oldpn)
 
   // look for uncommitted value
   if (get_store()->exists(get_name(), last_committed+1)) {
+    version_t v = get_store()->get(get_name(), "pending_v");
+    version_t pn = get_store()->get(get_name(), "pending_pn");
+    if (v && pn && v == last_committed + 1) {
+      uncommitted_pn = pn;
+    } else {
+      dout(10) << "WARNING: no pending_pn on disk, using previous accepted_pn " << accepted_pn
+	       << " and crossing our fingers" << dendl;
+      uncommitted_pn = accepted_pn;
+    }
     uncommitted_v = last_committed+1;
-    uncommitted_pn = accepted_pn;
+
     get_store()->get(get_name(), last_committed+1, uncommitted_value);
     assert(uncommitted_value.length());
     dout(10) << "learned uncommitted " << (last_committed+1)
+	     << " pn " << uncommitted_pn
 	     << " (" << uncommitted_value.length() << " bytes) from myself" 
 	     << dendl;
   }
@@ -164,6 +174,8 @@ void Paxos::handle_collect(MMonPaxos *collect)
   last->last_committed = last_committed;
   last->first_committed = first_committed;
   
+  version_t previous_pn = accepted_pn;
+
   // can we accept this pn?
   if (collect->pn > accepted_pn) {
     // ok, accept it
@@ -198,13 +210,25 @@ void Paxos::handle_collect(MMonPaxos *collect)
   // do we have an accepted but uncommitted value?
   //  (it'll be at last_committed+1)
   bufferlist bl;
-  if (get_store()->exists(get_name(), last_committed+1)) {
+  if (collect->last_committed == last_committed &&
+      get_store()->exists(get_name(), last_committed+1)) {
     get_store()->get(get_name(), last_committed+1, bl);
     assert(bl.length() > 0);
     dout(10) << " sharing our accepted but uncommitted value for " 
 	     << last_committed+1 << " (" << bl.length() << " bytes)" << dendl;
     last->values[last_committed+1] = bl;
-    last->uncommitted_pn = accepted_pn;
+
+    version_t v = get_store()->get(get_name(), "pending_v");
+    version_t pn = get_store()->get(get_name(), "pending_pn");
+    if (v && pn && v == last_committed + 1) {
+      last->uncommitted_pn = pn;
+    } else {
+      // previously we didn't record which pn a value was accepted
+      // under!  use the pn value we just had...  :(
+      dout(10) << "WARNING: no pending_pn on disk, using previous accepted_pn " << previous_pn
+	       << " and crossing our fingers" << dendl;
+      last->uncommitted_pn = previous_pn;
+    }
   }
 
   // send reply
@@ -370,9 +394,13 @@ void Paxos::handle_last(MMonPaxos *last)
     return;
   }
 
+  assert(g_conf->paxos_kill_at != 1);
+
   // store any committed values if any are specified in the message
   store_state(last);
 
+  assert(g_conf->paxos_kill_at != 2);
+
   // do they accept your pn?
   if (last->pn > accepted_pn) {
     // no, try again.
@@ -390,15 +418,23 @@ void Paxos::handle_last(MMonPaxos *last)
 	     << num_last << " peons" << dendl;
 
     // did this person send back an accepted but uncommitted value?
-    if (last->uncommitted_pn &&
-	last->uncommitted_pn > uncommitted_pn) {
-      uncommitted_v = last->last_committed+1;
-      uncommitted_pn = last->uncommitted_pn;
-      uncommitted_value = last->values[uncommitted_v];
-      dout(10) << "we learned an uncommitted value for " << uncommitted_v 
-	       << " pn " << uncommitted_pn
-	       << " " << uncommitted_value.length() << " bytes"
-	       << dendl;
+    if (last->uncommitted_pn) {
+      if (last->uncommitted_pn > uncommitted_pn &&
+	  last->last_committed >= last_committed &&
+	  last->last_committed + 1 >= uncommitted_v) {
+	uncommitted_v = last->last_committed+1;
+	uncommitted_pn = last->uncommitted_pn;
+	uncommitted_value = last->values[uncommitted_v];
+	dout(10) << "we learned an uncommitted value for " << uncommitted_v
+		 << " pn " << uncommitted_pn
+		 << " " << uncommitted_value.length() << " bytes"
+		 << dendl;
+      } else {
+	dout(10) << "ignoring uncommitted value for " << (last->last_committed+1)
+		 << " pn " << last->uncommitted_pn
+		 << " " << last->values[last->last_committed+1].length() << " bytes"
+		 << dendl;
+      }
     }
     
     // is that everyone?
@@ -502,6 +538,10 @@ void Paxos::begin(bufferlist& v)
   MonitorDBStore::Transaction t;
   t.put(get_name(), last_committed+1, new_value);
 
+  // note which pn this pending value is for.
+  t.put(get_name(), "pending_v", last_committed + 1);
+  t.put(get_name(), "pending_pn", accepted_pn);
+
   dout(30) << __func__ << " transaction dump:\n";
   JSONFormatter f(true);
   t.dump(&f);
@@ -516,6 +556,8 @@ void Paxos::begin(bufferlist& v)
 
   get_store()->apply_transaction(t);
 
+  assert(g_conf->paxos_kill_at != 3);
+
   if (mon->get_quorum().size() == 1) {
     // we're alone, take it easy
     commit();
@@ -566,6 +608,8 @@ void Paxos::handle_begin(MMonPaxos *begin)
   assert(begin->pn == accepted_pn);
   assert(begin->last_committed == last_committed);
   
+  assert(g_conf->paxos_kill_at != 4);
+
   // set state.
   state = STATE_UPDATING;
   lease_expire = utime_t();  // cancel lease
@@ -578,6 +622,10 @@ void Paxos::handle_begin(MMonPaxos *begin)
   MonitorDBStore::Transaction t;
   t.put(get_name(), v, begin->values[v]);
 
+  // note which pn this pending value is for.
+  t.put(get_name(), "pending_v", v);
+  t.put(get_name(), "pending_pn", accepted_pn);
+
   dout(30) << __func__ << " transaction dump:\n";
   JSONFormatter f(true);
   t.dump(&f);
@@ -586,6 +634,8 @@ void Paxos::handle_begin(MMonPaxos *begin)
 
   get_store()->apply_transaction(t);
 
+  assert(g_conf->paxos_kill_at != 5);
+
   // reply
   MMonPaxos *accept = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_ACCEPT,
 				    ceph_clock_now(g_ceph_context));
@@ -620,6 +670,8 @@ void Paxos::handle_accept(MMonPaxos *accept)
   accepted.insert(from);
   dout(10) << " now " << accepted << " have accepted" << dendl;
 
+  assert(g_conf->paxos_kill_at != 6);
+
   // new majority?
   if (accepted.size() == (unsigned)mon->monmap->size()/2+1) {
     // yay, commit!
@@ -643,6 +695,8 @@ void Paxos::handle_accept(MMonPaxos *accept)
     // yay!
     extend_lease();
 
+    assert(g_conf->paxos_kill_at != 10);
+
     finish_round();
 
     // wake people up
@@ -673,6 +727,8 @@ void Paxos::commit()
   //   leader still got a majority and committed with out us.)
   lease_expire = utime_t();  // cancel lease
 
+  assert(g_conf->paxos_kill_at != 7);
+
   MonitorDBStore::Transaction t;
 
   // commit locally
@@ -692,6 +748,8 @@ void Paxos::commit()
 
   get_store()->apply_transaction(t);
 
+  assert(g_conf->paxos_kill_at != 8);
+
   // refresh first_committed; this txn may have trimmed.
   first_committed = get_store()->get(get_name(), "first_committed");
 
@@ -713,6 +771,8 @@ void Paxos::commit()
     mon->messenger->send_message(commit, mon->monmap->get_inst(*p));
   }
 
+  assert(g_conf->paxos_kill_at != 9);
+
   // get ready for a new round.
   new_value.clear();
 
diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h
index cab27f289a8..69419e64ab9 100644
--- a/src/mon/Paxos.h
+++ b/src/mon/Paxos.h
@@ -290,8 +290,9 @@ private:
    */
   version_t accepted_pn;
   /**
-   * @todo This has something to do with the last_committed version. Not sure
-   *	   about what it entails, tbh.
+   * The last_committed epoch of the leader at the time we accepted the last pn.
+   *
+   * This has NO SEMANTIC MEANING, and is there only for the debug output.
    */
   version_t accepted_pn_from;
   /**
@@ -1114,7 +1115,7 @@ public:
    * @param t The transaction to which we will append the operations
    * @param bl A bufferlist containing an encoded transaction
    */
-  void decode_append_transaction(MonitorDBStore::Transaction& t,
+  static void decode_append_transaction(MonitorDBStore::Transaction& t,
 				 bufferlist& bl) {
     MonitorDBStore::Transaction vt;
     bufferlist::iterator it = bl.begin();
diff --git a/src/os/FDCache.h b/src/os/FDCache.h
index cf07f860aa5..f0f40e7bbf4 100644
--- a/src/os/FDCache.h
+++ b/src/os/FDCache.h
@@ -28,6 +28,7 @@
  * FD Cache
  */
 class FDCache : public md_config_obs_t {
+public:
   /**
    * FD
    *
@@ -47,8 +48,10 @@ class FDCache : public md_config_obs_t {
     }
   };
 
+private:
   SharedLRU<hobject_t, FD> registry;
   CephContext *cct;
+
 public:
   FDCache(CephContext *cct) : cct(cct) {
     assert(cct);
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 10f2b1f2aad..17105c11d69 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -220,7 +220,8 @@ int FileStore::lfn_open(coll_t cid,
     r = get_index(cid, index);
   }
   Mutex::Locker l(fdcache_lock);
-  *outfd = fdcache.lookup(oid);
+  if (!replaying)
+    *outfd = fdcache.lookup(oid);
   if (*outfd) {
     return 0;
   }
@@ -258,7 +259,10 @@ int FileStore::lfn_open(coll_t cid,
       goto fail;
     }
   }
-  *outfd = fdcache.add(oid, fd);
+  if (!replaying)
+    *outfd = fdcache.add(oid, fd);
+  else
+    *outfd = FDRef(new FDCache::FD(fd));
   return 0;
 
  fail:
@@ -3060,7 +3064,8 @@ int FileStore::_write(coll_t cid, const hobject_t& oid,
     r = bl.length();
 
   // flush?
-  wbthrottle.queue_wb(fd, oid, offset, len, replica);
+  if (!replaying)
+    wbthrottle.queue_wb(fd, oid, offset, len, replica);
   lfn_close(fd);
 
  out:
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 464ed770df2..3f226cec95d 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -4739,11 +4739,12 @@ bool OSDService::prepare_to_stop()
   if (state != NOT_STOPPING)
     return false;
 
-  if (get_osdmap()->is_up(whoami)) {
+  OSDMapRef osdmap = get_osdmap();
+  if (osdmap && osdmap->is_up(whoami)) {
     state = PREPARING_TO_STOP;
     monc->send_mon_message(new MOSDMarkMeDown(monc->get_fsid(),
-					      get_osdmap()->get_inst(whoami),
-					      get_osdmap()->get_epoch(),
+					      osdmap->get_inst(whoami),
+					      osdmap->get_epoch(),
 					      false
 					      ));
     utime_t now = ceph_clock_now(g_ceph_context);
diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc
index 6ba08362dad..dac1f33fd91 100644
--- a/src/osd/PGLog.cc
+++ b/src/osd/PGLog.cc
@@ -375,7 +375,6 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead
     }
     assert(p->version > newhead);
     dout(10) << "rewind_divergent_log future divergent " << *p << dendl;
-    log.unindex(*p);
   }
 
   log.head = newhead;
@@ -383,6 +382,7 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead
   if (info.last_complete > newhead)
     info.last_complete = newhead;
 
+  log.index();
   for (list<pg_log_entry_t>::iterator d = divergent.begin(); d != divergent.end(); ++d)
     merge_old_entry(t, *d, info, remove_snap);
 
@@ -505,7 +505,6 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
 	break;
       dout(10) << "merge_log divergent " << oe << dendl;
       divergent.push_front(oe);
-      log.unindex(oe);
       log.log.pop_back();
     }
 
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 9c8d42dbf3c..298d38d6ace 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -7767,6 +7767,14 @@ void ReplicatedPG::_scrub_finish()
 #undef dout_prefix
 #define dout_prefix *_dout << pg->gen_prefix() 
 
+ReplicatedPG::SnapTrimmer::~SnapTrimmer()
+{
+  while (!repops.empty()) {
+    (*repops.begin())->put();
+    repops.erase(repops.begin());
+  }
+}
+
 void ReplicatedPG::SnapTrimmer::log_enter(const char *state_name)
 {
   dout(20) << "enter " << state_name << dendl;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 0d4867f6e6d..9dafe23faa1 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -982,6 +982,7 @@ private:
     bool need_share_pg_info;
     bool requeue;
     SnapTrimmer(ReplicatedPG *pg) : pg(pg), need_share_pg_info(false), requeue(false) {}
+    ~SnapTrimmer();
     void log_enter(const char *state_name);
     void log_exit(const char *state_name, utime_t duration);
   } snap_trimmer_machine;
diff --git a/src/osdc/Filer.cc b/src/osdc/Filer.cc
index 7eb4ad616db..8f94a97d292 100644
--- a/src/osdc/Filer.cc
+++ b/src/osdc/Filer.cc
@@ -131,8 +131,7 @@ void Filer::_probed(Probe *probe, const object_t& oid, uint64_t size, utime_t mt
     return;  // waiting for more!
 
   if (probe->err) { // we hit an error, propagate back up
-    probe->onfinish->finish(probe->err);
-    delete probe->onfinish;
+    probe->onfinish->complete(probe->err);
     delete probe;
     return;
   }
@@ -216,8 +215,7 @@ void Filer::_probed(Probe *probe, const object_t& oid, uint64_t size, utime_t mt
   }
 
   // done!  finish and clean up.
-  probe->onfinish->finish(probe->err);
-  delete probe->onfinish;
+  probe->onfinish->complete(probe->err);
   delete probe;
 }
 
@@ -285,8 +283,7 @@ void Filer::_do_purge_range(PurgeRange *pr, int fin)
 	   << " uncommitted " << pr->uncommitted << dendl;
 
   if (pr->num == 0 && pr->uncommitted == 0) {
-    pr->oncommit->finish(0);
-    delete pr->oncommit;
+    pr->oncommit->complete(0);
     delete pr;
     return;
   }
diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc
index cd9b9edc4c7..ba4ca8dc4b9 100644
--- a/src/osdc/Journaler.cc
+++ b/src/osdc/Journaler.cc
@@ -181,8 +181,7 @@ void Journaler::_finish_reread_head(int r, bufferlist& bl, Context *finish)
   trimmed_pos = trimming_pos = h.trimmed_pos;
   init_headers(h);
   state = STATE_ACTIVE;
-  finish->finish(r);
-  delete finish;
+  finish->complete(r);
 }
 
 void Journaler::_finish_read_head(int r, bufferlist& bl)
@@ -261,8 +260,7 @@ void Journaler::_finish_reprobe(int r, uint64_t new_end, Context *onfinish) {
 	  << dendl;
   prezeroing_pos = prezero_pos = write_pos = flush_pos = safe_pos = new_end;
   state = STATE_ACTIVE;
-  onfinish->finish(r);
-  delete onfinish;
+  onfinish->complete(r);
 }
 
 void Journaler::_finish_probe_end(int r, uint64_t end)
@@ -367,8 +365,7 @@ void Journaler::_finish_write_head(int r, Header &wrote, Context *oncommit)
   ldout(cct, 10) << "_finish_write_head " << wrote << dendl;
   last_committed = wrote;
   if (oncommit) {
-    oncommit->finish(r);
-    delete oncommit;
+    oncommit->complete(r);
   }
 
   trim();  // trim?
@@ -563,8 +560,7 @@ void Journaler::wait_for_flush(Context *onsafe)
     ldout(cct, 10) << "flush nothing to flush, (prezeroing/prezero)/write/flush/safe pointers at " 
 	     << "(" << prezeroing_pos << "/" << prezero_pos << ")/" << write_pos << "/" << flush_pos << "/" << safe_pos << dendl;
     if (onsafe) {
-      onsafe->finish(0);
-      delete onsafe;
+      onsafe->complete(0);
       onsafe = 0;
     }
     return;
@@ -584,8 +580,7 @@ void Journaler::flush(Context *onsafe)
     ldout(cct, 10) << "flush nothing to flush, (prezeroing/prezero)/write/flush/safe pointers at "
 	     << "(" << prezeroing_pos << "/" << prezero_pos << ")/" << write_pos << "/" << flush_pos << "/" << safe_pos << dendl;
     if (onsafe) {
-      onsafe->finish(0);
-      delete onsafe;
+      onsafe->complete(0);
     }
   } else {
     if (1) {
@@ -731,8 +726,7 @@ void Journaler::_finish_read(int r, uint64_t offset, bufferlist& bl)
     if (on_readable) {
       Context *f = on_readable;
       on_readable = 0;
-      f->finish(r);
-      delete f;
+      f->complete(r);
     }
     return;
   }
@@ -779,8 +773,7 @@ void Journaler::_assimilate_prefetch()
     if (on_readable) {
       Context *f = on_readable;
       on_readable = 0;
-      f->finish(0);
-      delete f;
+      f->complete(0);
     }
   }
 }
@@ -1060,8 +1053,7 @@ void Journaler::handle_write_error(int r)
 {
   lderr(cct) << "handle_write_error " << cpp_strerror(r) << dendl;
   if (on_write_error) {
-    on_write_error->finish(r);
-    delete on_write_error;
+    on_write_error->complete(r);
     on_write_error = NULL;
   } else {
     assert(0 == "unhandled write error");
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index a5a023cb33e..9933f853f8f 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -321,8 +321,7 @@ void Objecter::_linger_ack(LingerOp *info, int r)
 {
   ldout(cct, 10) << "_linger_ack " << info->linger_id << dendl;
   if (info->on_reg_ack) {
-    info->on_reg_ack->finish(r);
-    delete info->on_reg_ack;
+    info->on_reg_ack->complete(r);
     info->on_reg_ack = NULL;
   }
 }
@@ -331,8 +330,7 @@ void Objecter::_linger_commit(LingerOp *info, int r)
 {
   ldout(cct, 10) << "_linger_commit " << info->linger_id << dendl;
   if (info->on_reg_commit) {
-    info->on_reg_commit->finish(r);
-    delete info->on_reg_commit;
+    info->on_reg_commit->complete(r);
     info->on_reg_commit = NULL;
   }
 
@@ -676,8 +674,7 @@ void Objecter::handle_osd_map(MOSDMap *m)
     //go through the list and call the onfinish methods
     for (list<pair<Context*, int> >::iterator i = p->second.begin();
 	 i != p->second.end(); ++i) {
-      i->first->finish(i->second);
-      delete i->first;
+      i->first->complete(i->second);
     }
     waiting_for_map.erase(p++);
   }
@@ -1622,12 +1619,10 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m)
 
   // do callbacks
   if (onack) {
-    onack->finish(rc);
-    delete onack;
+    onack->complete(rc);
   }
   if (oncommit) {
-    oncommit->finish(rc);
-    delete oncommit;
+    oncommit->complete(rc);
   }
 
   m->put();
@@ -1646,8 +1641,7 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) {
 	   << "\nlist_context->cookie" << list_context->cookie << dendl;
 
   if (list_context->at_end) {
-    onfinish->finish(0);
-    delete onfinish;
+    onfinish->complete(0);
     return;
   }
 
@@ -1667,8 +1661,7 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) {
     list_context->starting_pg_num = pg_num;
   }
   if (list_context->current_pg == pg_num){ //this context got all the way through
-    onfinish->finish(0);
-    delete onfinish;
+    onfinish->complete(0);
     return;
   }
 
@@ -1722,9 +1715,8 @@ void Objecter::_list_reply(ListContext *list_context, int r, bufferlist *bl,
     ldout(cct, 20) << "got a response with objects, proceeding" << dendl;
     list_context->list.merge(response.entries);
     if (response_size >= list_context->max_entries) {
-      final_finish->finish(0);
+      final_finish->complete(0);
       delete bl;
-      delete final_finish;
       return;
     }
 
@@ -1756,8 +1748,7 @@ void Objecter::_list_reply(ListContext *list_context, int r, bufferlist *bl,
   ldout(cct, 20) << "out of pgs, returning to" << final_finish << dendl;
   list_context->at_end = true;
   delete bl;
-  final_finish->finish(0);
-  delete final_finish;
+  final_finish->complete(0);
   return;
 }
 
@@ -1799,8 +1790,7 @@ struct C_SelfmanagedSnap : public Context {
       bufferlist::iterator p = bl.begin();
       ::decode(*psnapid, p);
     }
-    fin->finish(r);
-    delete fin;
+    fin->complete(r);
   }
 };
 
@@ -1975,8 +1965,7 @@ void Objecter::handle_pool_op_reply(MPoolOpReply *m)
       wait_for_new_map(op->onfinish, m->epoch, m->replyCode);
     }
     else {
-      op->onfinish->finish(m->replyCode);
-      delete op->onfinish;
+      op->onfinish->complete(m->replyCode);
     }
     op->onfinish = NULL;
     delete op;
@@ -2033,8 +2022,7 @@ void Objecter::handle_get_pool_stats_reply(MGetPoolStatsReply *m)
     *op->pool_stats = m->pool_stats;
     if (m->version > last_seen_pgmap_version)
       last_seen_pgmap_version = m->version;
-    op->onfinish->finish(0);
-    delete op->onfinish;
+    op->onfinish->complete(0);
     poolstat_ops.erase(tid);
     delete op;
 
@@ -2085,8 +2073,7 @@ void Objecter::handle_fs_stats_reply(MStatfsReply *m)
     *(op->stats) = m->h.st;
     if (m->h.version > last_seen_pgmap_version)
       last_seen_pgmap_version = m->h.version;
-    op->onfinish->finish(0);
-    delete op->onfinish;
+    op->onfinish->complete(0);
     statfs_ops.erase(tid);
     delete op;
 
@@ -2128,8 +2115,7 @@ void Objecter::_sg_read_finish(vector<ObjectExtent>& extents, vector<bufferlist>
   ldout(cct, 7) << "_sg_read_finish " << bytes_read << " bytes" << dendl;
 
   if (onfinish) {
-    onfinish->finish(bytes_read);// > 0 ? bytes_read:m->get_result());
-    delete onfinish;
+    onfinish->complete(bytes_read);// > 0 ? bytes_read:m->get_result());
   }
 }
 
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index c1cac88b60e..b593bef69d9 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -860,8 +860,7 @@ public:
 	if (pmtime)
 	  *pmtime = m;
       }
-      fin->finish(r);
-      delete fin;
+      fin->complete(r);
     }
   };
 
@@ -875,8 +874,7 @@ public:
 	bufferlist::iterator p = bl.begin();
 	::decode(attrset, p);
       }
-      fin->finish(r);
-      delete fin;
+      fin->complete(r);
     }
   };
 
@@ -916,8 +914,7 @@ public:
       if (r >= 0) {
         objecter->_list_reply(list_context, r, bl, final_finish, epoch);
       } else {
-        final_finish->finish(r);
-        delete final_finish;
+        final_finish->complete(r);
       }
     }
   };
diff --git a/src/pybind/ceph_argparse.py b/src/pybind/ceph_argparse.py
index 72b36dd50a5..73d1115f645 100644
--- a/src/pybind/ceph_argparse.py
+++ b/src/pybind/ceph_argparse.py
@@ -321,18 +321,20 @@ class CephName(CephArgtype):
 
     Also accept '*'
     """
+    def __init__(self):
+        self.nametype = None
+        self.nameid = None
+
     def valid(self, s, partial=False):
         if s == '*':
             self.val = s
-            self.nametype = None
-            self.nameid = None
             return
         if s.find('.') == -1:
             raise ArgumentFormat('CephName: no . in {0}'.format(s))
         else:
             t, i = s.split('.')
             if not t in ('osd', 'mon', 'client', 'mds'):
-                raise ArgumentValid('unknown type ' + self.t)
+                raise ArgumentValid('unknown type ' + t)
             if t == 'osd':
                 if i != '*':
                     try:
@@ -352,19 +354,21 @@ class CephOsdName(CephArgtype):
 
     osd.<id>, or <id>, or *, where id is a base10 int
     """
+    def __init__(self):
+        self.nametype = None
+        self.nameid = None
+
     def valid(self, s, partial=False):
         if s == '*':
             self.val = s
-            self.nametype = None
-            self.nameid = None
             return
         if s.find('.') != -1:
             t, i = s.split('.')
+            if t != 'osd':
+                raise ArgumentValid('unknown type ' + t)
         else:
             t = 'osd'
             i = s
-        if t != 'osd':
-            raise ArgumentValid('unknown type ' + self.t)
         try:
             i = int(i)
         except:
@@ -381,7 +385,7 @@ class CephChoices(CephArgtype):
     Set of string literals; init with valid choices
     """
     def __init__(self, strings='', **kwargs):
-        self.strings=strings.split('|')
+        self.strings = strings.split('|')
 
     def valid(self, s, partial=False):
         if not partial:
@@ -523,16 +527,16 @@ class argdesc(object):
     def __repr__(self):
         r = 'argdesc(' + str(self.t) + ', '
         internals = ['N', 'typeargs', 'instance', 't']
-        for (k,v) in self.__dict__.iteritems():
+        for (k, v) in self.__dict__.iteritems():
             if k.startswith('__') or k in internals:
                 pass
             else:
                 # undo modification from __init__
                 if k == 'n' and self.N:
                     v = 'N'
-                r += '{0}={1}, '.format(k,v)
-        for (k,v) in self.typeargs.iteritems():
-                r += '{0}={1}, '.format(k,v)
+                r += '{0}={1}, '.format(k, v)
+        for (k, v) in self.typeargs.iteritems():
+            r += '{0}={1}, '.format(k, v)
         return r[:-2] + ')'
 
     def __str__(self):
@@ -698,7 +702,7 @@ def matchnum(args, signature, partial=False):
         while desc.numseen < desc.n:
             # if there are no more arguments, return
             if not words:
-                return matchcnt;
+                return matchcnt
             word = words.pop(0)
 
             try:
@@ -887,7 +891,7 @@ def validate_command(parsed_args, sigdict, args, verbose=False):
 
         return valid_dict
 
-def send_command(cluster, target=('mon', ''), cmd=[], inbuf='', timeout=0, 
+def send_command(cluster, target=('mon', ''), cmd=None, inbuf='', timeout=0, 
                  verbose=False):
     """
     Send a command to a daemon using librados's
@@ -900,6 +904,7 @@ def send_command(cluster, target=('mon', ''), cmd=[], inbuf='', timeout=0,
 
     If target is osd.N, send command to that osd (except for pgid cmds)
     """
+    cmd = cmd or []
     try:
         if target[0] == 'osd':
             osdid = target[1]
diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc
index bdd458e68b6..c93de7cd58a 100644
--- a/src/rgw/rgw_auth_s3.cc
+++ b/src/rgw/rgw_auth_s3.cc
@@ -190,8 +190,14 @@ bool rgw_create_s3_canonical_header(req_info& info, utime_t *header_time, string
   map<string, string>& meta_map = info.x_meta_map;
   map<string, string>& sub_resources = info.args.get_sub_resources();
 
+  string request_uri;
+  if (info.effective_uri.empty())
+    request_uri = info.request_uri;
+  else
+    request_uri = info.effective_uri;
+
   rgw_create_s3_canonical_header(info.method, content_md5, content_type, date.c_str(),
-                            meta_map, info.request_uri.c_str(), sub_resources,
+                            meta_map, request_uri.c_str(), sub_resources,
                             dest);
 
   return true;
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index aea396bf3de..8a281775d07 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -109,7 +109,12 @@ void req_info::rebuild_from(req_info& src)
 {
   method = src.method;
   script_uri = src.script_uri;
-  request_uri = src.request_uri;
+  if (src.effective_uri.empty()) {
+    request_uri = src.request_uri;
+  } else {
+    request_uri = src.effective_uri;
+  }
+  effective_uri.clear();
   host = src.host;
 
   x_meta_map = src.x_meta_map;
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 1d3596d4418..7f224a798f5 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -764,6 +764,7 @@ struct req_info {
   const char *method;
   string script_uri;
   string request_uri;
+  string effective_uri;
   string request_params;
 
   req_info(CephContext *cct, RGWEnv *_env);
@@ -780,7 +781,7 @@ struct req_state {
    int format;
    ceph::Formatter *formatter;
    string decoded_uri;
-   string effective_uri;
+   string relative_uri;
    const char *length;
    uint64_t content_length;
    map<string, string> generic_attrs;
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index 45477486ccc..7760a2f5c52 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -1654,6 +1654,25 @@ int RGWCopyObj::init_common()
   return 0;
 }
 
+static void copy_obj_progress_cb(off_t ofs, void *param)
+{
+  RGWCopyObj *op = static_cast<RGWCopyObj *>(param);
+  op->progress_cb(ofs);
+}
+
+void RGWCopyObj::progress_cb(off_t ofs)
+{
+  if (!s->cct->_conf->rgw_copy_obj_progress)
+    return;
+
+  if (ofs - last_ofs < s->cct->_conf->rgw_copy_obj_progress_every_bytes)
+    return;
+
+  send_partial_response(ofs);
+
+  last_ofs = ofs;
+}
+
 void RGWCopyObj::execute()
 {
   rgw_obj src_obj, dst_obj;
@@ -1685,7 +1704,9 @@ void RGWCopyObj::execute()
                         replace_attrs,
                         attrs, RGW_OBJ_CATEGORY_MAIN,
                         &s->req_id, /* use req_id as tag */
-                        &s->err);
+                        &s->err,
+                        copy_obj_progress_cb, (void *)this
+                        );
 }
 
 int RGWGetACLs::verify_permission()
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index e107b90a155..5da2e4f472c 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -438,6 +438,8 @@ protected:
   string client_id;
   string op_id;
 
+  off_t last_ofs;
+
 
   int init_common();
 
@@ -460,6 +462,7 @@ public:
     ret = 0;
     mtime = 0;
     replace_attrs = false;
+    last_ofs = 0;
   }
 
   virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) {
@@ -468,9 +471,11 @@ public:
   }
   int verify_permission();
   void execute();
+  void progress_cb(off_t ofs);
 
   virtual int init_dest_policy() { return 0; }
   virtual int get_params() = 0;
+  virtual void send_partial_response(off_t ofs) {}
   virtual void send_response() = 0;
   virtual const string name() { return "copy_obj"; }
   virtual uint32_t op_mask() { return RGW_OP_TYPE_WRITE; }
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 0c7b22a42d3..8af03b03a8f 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -2397,9 +2397,16 @@ class RGWRadosPutObj : public RGWGetDataCB
   rgw_obj obj;
   RGWPutObjProcessor_Atomic *processor;
   RGWOpStateSingleOp *opstate;
+  void (*progress_cb)(off_t, void *);
+  void *progress_data;
 public:
-  RGWRadosPutObj(RGWPutObjProcessor_Atomic *p, RGWOpStateSingleOp *_ops) : processor(p), opstate(_ops) {}
+  RGWRadosPutObj(RGWPutObjProcessor_Atomic *p, RGWOpStateSingleOp *_ops,
+                 void (*_progress_cb)(off_t, void *), void *_progress_data) : processor(p), opstate(_ops),
+                                                                       progress_cb(_progress_cb),
+                                                                       progress_data(_progress_data) {}
   int handle_data(bufferlist& bl, off_t ofs, off_t len) {
+    progress_cb(ofs, progress_data);
+
     void *handle;
     int ret = processor->handle_data(bl, ofs, &handle);
     if (ret < 0)
@@ -2477,7 +2484,9 @@ int RGWRados::copy_obj(void *ctx,
                map<string, bufferlist>& attrs,
                RGWObjCategory category,
                string *ptag,
-               struct rgw_err *err)
+               struct rgw_err *err,
+               void (*progress_cb)(off_t, void *),
+               void *progress_data)
 {
   int ret;
   uint64_t total_len, obj_size;
@@ -2545,7 +2554,7 @@ int RGWRados::copy_obj(void *ctx,
       ldout(cct, 0) << "ERROR: failed to set opstate ret=" << ret << dendl;
       return ret;
     }
-    RGWRadosPutObj cb(&processor, &opstate);
+    RGWRadosPutObj cb(&processor, &opstate, progress_cb, progress_data);
     string etag;
     map<string, string> req_headers;
     time_t set_mtime;
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index c9924e0dc56..bcc40900299 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -1121,7 +1121,9 @@ public:
                map<std::string, bufferlist>& attrs,
                RGWObjCategory category,
                string *ptag,
-               struct rgw_err *err);
+               struct rgw_err *err,
+               void (*progress_cb)(off_t, void *),
+               void *progress_data);
 
   int copy_obj_data(void *ctx,
 	       void *handle, off_t end,
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 0f9e61d1740..e4933a67a39 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -1242,7 +1242,7 @@ RGWHandler *RGWREST::get_handler(RGWRados *store, struct req_state *s, RGWClient
   if (*init_error < 0)
     return NULL;
 
-  RGWRESTMgr *m = mgr.get_resource_mgr(s, s->decoded_uri, &s->effective_uri);
+  RGWRESTMgr *m = mgr.get_resource_mgr(s, s->decoded_uri, &s->relative_uri);
   if (!m) {
     *init_error = -ERR_METHOD_NOT_ALLOWED;
     return NULL;
diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc
index 2075e535525..ea80b5b84f8 100644
--- a/src/rgw/rgw_rest_client.cc
+++ b/src/rgw/rgw_rest_client.cc
@@ -403,6 +403,7 @@ int RGWRESTStreamWriteRequest::put_obj_init(RGWAccessKey& key, rgw_obj& obj, uin
   new_info.script_uri = "/";
   new_info.script_uri.append(resource);
   new_info.request_uri = new_info.script_uri;
+  new_info.effective_uri = new_info.effective_uri;
 
   map<string, string>& m = new_env.get_map();
   map<string, bufferlist>::iterator bliter;
@@ -568,6 +569,7 @@ int RGWRESTStreamReadRequest::get_obj(RGWAccessKey& key, map<string, string>& ex
   new_info.script_uri = "/";
   new_info.script_uri.append(resource);
   new_info.request_uri = new_info.script_uri;
+  new_info.effective_uri = new_info.effective_uri;
 
   new_info.init_meta_info(NULL);
 
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index 66f6652ec6a..6c1738218e6 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -1300,15 +1300,33 @@ int RGWCopyObj_ObjStore_S3::get_params()
   return 0;
 }
 
-void RGWCopyObj_ObjStore_S3::send_response()
+void RGWCopyObj_ObjStore_S3::send_partial_response(off_t ofs)
 {
-  if (ret)
+  if (!sent_header) {
+    if (ret)
     set_req_state_err(s, ret);
-  dump_errno(s);
+    dump_errno(s);
+
+    end_header(s, "binary/octet-stream");
+    if (ret == 0) {
+      s->formatter->open_object_section("CopyObjectResult");
+    }
+    sent_header = true;
+  } else {
+    /* Send progress field. Note that this diverge from the original S3
+     * spec. We do this in order to keep connection alive.
+     */
+    s->formatter->dump_int("Progress", (uint64_t)ofs);
+  }
+  rgw_flush_formatter(s, s->formatter);
+}
+
+void RGWCopyObj_ObjStore_S3::send_response()
+{
+  if (!sent_header)
+    send_partial_response(0);
 
-  end_header(s, "binary/octet-stream");
   if (ret == 0) {
-    s->formatter->open_object_section("CopyObjectResult");
     dump_time(s, "LastModified", &mtime);
     map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
     if (iter != attrs.end()) {
@@ -1801,7 +1819,7 @@ int RGWHandler_ObjStore_S3::init_from_header(struct req_state *s, int default_fo
   string req;
   string first;
 
-  const char *req_name = s->effective_uri.c_str();
+  const char *req_name = s->relative_uri.c_str();
   const char *p;
 
   if (*req_name == '?') {
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index e2a1b0b92eb..a0af4eac9fd 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -143,12 +143,14 @@ public:
 };
 
 class RGWCopyObj_ObjStore_S3 : public RGWCopyObj_ObjStore {
+  bool sent_header;
 public:
-  RGWCopyObj_ObjStore_S3() {}
+  RGWCopyObj_ObjStore_S3() : sent_header(false) {}
   ~RGWCopyObj_ObjStore_S3() {}
 
   int init_dest_policy();
   int get_params();
+  void send_partial_response(off_t ofs);
   void send_response();
 };
 
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index 157158e7ed7..b4f830830f9 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -288,6 +288,8 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params()
 {
   policy.create_default(s->user.user_id, s->user.display_name);
 
+  location_constraint = store->region.api_name;
+
   return 0;
 }
 
@@ -475,13 +477,40 @@ int RGWCopyObj_ObjStore_SWIFT::get_params()
   return 0;
 }
 
+void RGWCopyObj_ObjStore_SWIFT::send_partial_response(off_t ofs)
+{
+  if (!sent_header) {
+    if (!ret)
+      ret = STATUS_CREATED;
+    set_req_state_err(s, ret);
+    dump_errno(s);
+    end_header(s);
+
+    /* Send progress information. Note that this diverge from the original swift
+     * spec. We do this in order to keep connection alive.
+     */
+    if (ret == 0) {
+      s->formatter->open_array_section("progress");
+    }
+    sent_header = true;
+  } else {
+    s->formatter->dump_int("ofs", (uint64_t)ofs);
+  }
+  rgw_flush_formatter(s, s->formatter);
+}
+
 void RGWCopyObj_ObjStore_SWIFT::send_response()
 {
-  if (!ret)
-    ret = STATUS_CREATED;
-  set_req_state_err(s, ret);
-  dump_errno(s);
-  end_header(s);
+  if (!sent_header) {
+   if (!ret)
+      ret = STATUS_CREATED;
+    set_req_state_err(s, ret);
+    dump_errno(s);
+    end_header(s);
+  } else {
+    s->formatter->close_section();
+    rgw_flush_formatter(s, s->formatter);
+  }
 }
 
 int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len)
@@ -829,11 +858,16 @@ int RGWHandler_ObjStore_SWIFT::init_from_header(struct req_state *s)
 
   s->bucket_name_str = first;
   s->bucket_name = strdup(s->bucket_name_str.c_str());
+
    
+  s->info.effective_uri = "/" + s->bucket_name_str;
+
   if (req.size()) {
     s->object_str = req;
     s->object = strdup(s->object_str.c_str());
+    s->info.effective_uri.append("/" + s->object_str);
   }
+
   return 0;
 }
 
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
index e4b6f0bccee..1c23ab29204 100644
--- a/src/rgw/rgw_rest_swift.h
+++ b/src/rgw/rgw_rest_swift.h
@@ -100,13 +100,15 @@ public:
 };
 
 class RGWCopyObj_ObjStore_SWIFT : public RGWCopyObj_ObjStore {
+  bool sent_header;
 public:
-  RGWCopyObj_ObjStore_SWIFT() {}
+  RGWCopyObj_ObjStore_SWIFT() : sent_header(false) {}
   ~RGWCopyObj_ObjStore_SWIFT() {}
 
   int init_dest_policy();
   int get_params();
   void send_response();
+  void send_partial_response(off_t ofs);
 };
 
 class RGWGetACLs_ObjStore_SWIFT : public RGWGetACLs_ObjStore {
diff --git a/src/test/ObjectMap/KeyValueDBMemory.h b/src/test/ObjectMap/KeyValueDBMemory.h
index baed9de28e0..93d0809d491 100644
--- a/src/test/ObjectMap/KeyValueDBMemory.h
+++ b/src/test/ObjectMap/KeyValueDBMemory.h
@@ -104,8 +104,7 @@ public:
       for (list<Context *>::iterator i = on_commit.begin();
 	   i != on_commit.end();
 	   on_commit.erase(i++)) {
-	(*i)->finish(0);
-	delete *i;
+	(*i)->complete(0);
       }
       return 0;
     }
diff --git a/src/test/filestore/workload_generator.h b/src/test/filestore/workload_generator.h
index 6a63b353c61..80e95dae6ec 100644
--- a/src/test/filestore/workload_generator.h
+++ b/src/test/filestore/workload_generator.h
@@ -163,7 +163,7 @@ public:
       : stat_state(state), ctx(context) { }
 
     void finish(int r) {
-      ctx->finish(r);
+      ctx->complete(r);
 
       stat_state->wrkldgen->m_stats_lock.Lock();
 
diff --git a/src/test/gather.cc b/src/test/gather.cc
index 92bec7650c6..e067ceed8f9 100644
--- a/src/test/gather.cc
+++ b/src/test/gather.cc
@@ -40,8 +40,7 @@ TEST(ContextGather, OneSub) {
   C_Checker *checker = new C_Checker(&finish_called, &result);
   gather.set_finisher(checker);
   gather.activate();
-  sub->finish(0);
-  delete sub;
+  sub->complete(0);
   EXPECT_TRUE(finish_called);
   EXPECT_EQ(0, result);
 }
@@ -63,14 +62,12 @@ TEST(ContextGather, ManySubs) {
 
   //finish all except one sub
   for (int j = 0; j < sub_count - 1; ++j) {
-    subs[j]->finish(0);
-    delete subs[j];
+    subs[j]->complete(0);
     EXPECT_FALSE(finish_called);
   }
 
   //finish last one and check asserts
-  subs[sub_count-1]->finish(0);
-  delete subs[sub_count-1];
+  subs[sub_count-1]->complete(0);
   EXPECT_TRUE(finish_called);
 }
 
@@ -92,16 +89,14 @@ TEST(ContextGather, AlternatingSubCreateFinish) {
 
   //alternate finishing first half of subs and creating last half of subs
   for (int j = 0; j < sub_count / 2; ++j) {
-    subs[j]->finish(0);
-    delete subs[j];
+    subs[j]->complete(0);
     subs[sub_count / 2 + j] = gather.new_sub();
   }
   gather.activate();
 
   //finish last half of subs
   for (int k = sub_count / 2; k < sub_count; ++k) {
-    subs[k]->finish(0);
-    delete subs[k];
+    subs[k]->complete(0);
   }
 
   EXPECT_TRUE(finish_called);
diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc
index d8ec8d03df2..e0863f726a0 100644
--- a/src/test/osd/TestPGLog.cc
+++ b/src/test/osd/TestPGLog.cc
@@ -82,6 +82,10 @@ TEST_F(PGLogTest, rewind_divergent_log) {
     hobject_t divergent_object;
     eversion_t divergent_version;
     eversion_t newhead;
+
+    hobject_t divergent;
+    divergent.hash = 0x9;
+
     {
       pg_log_entry_t e;
 
@@ -90,16 +94,16 @@ TEST_F(PGLogTest, rewind_divergent_log) {
       log.tail = e.version;
       log.log.push_back(e);
       e.version = newhead = eversion_t(1, 4);
-      e.soid.hash = 0x9;
+      e.soid = divergent;
       e.op = pg_log_entry_t::MODIFY;
       log.log.push_back(e);
-      log.index();
       e.version = divergent_version = eversion_t(1, 5);
-      e.soid.hash = 0x9;
+      e.soid = divergent;
       divergent_object = e.soid;
       e.op = pg_log_entry_t::DELETE;
       log.log.push_back(e);
       log.head = e.version;
+      log.index();
 
       info.last_update = log.head;
       info.last_complete = log.head;
@@ -118,6 +122,7 @@ TEST_F(PGLogTest, rewind_divergent_log) {
     rewind_divergent_log(t, newhead, info, remove_snap,
 			 dirty_info, dirty_big_info);
 
+    EXPECT_TRUE(log.objects.count(divergent));
     EXPECT_TRUE(missing.is_missing(divergent_object));
     EXPECT_EQ(1U, log.objects.count(divergent_object));
     EXPECT_EQ(2U, log.log.size());
diff --git a/src/tools/ceph-monstore-tool.cc b/src/tools/ceph-monstore-tool.cc
index ae608a302f2..f361266aff0 100644
--- a/src/tools/ceph-monstore-tool.cc
+++ b/src/tools/ceph-monstore-tool.cc
@@ -31,6 +31,7 @@
 #include "global/global_init.h"
 #include "os/LevelDBStore.h"
 #include "mon/MonitorDBStore.h"
+#include "mon/Paxos.h"
 #include "common/Formatter.h"
 
 namespace po = boost::program_options;
@@ -246,6 +247,19 @@ int main(int argc, char **argv) {
       goto done;
     }
     bl.write_fd(fd);
+  } else if (cmd == "dump-paxos") {
+    for (version_t v = dstart; v <= dstop; ++v) {
+      bufferlist bl;
+      st.get("paxos", v, bl);
+      if (bl.length() == 0)
+	break;
+      cout << "\n--- " << v << " ---" << std::endl;
+      MonitorDBStore::Transaction tx;
+      Paxos::decode_append_transaction(tx, bl);
+      JSONFormatter f(true);
+      tx.dump(&f);
+      f.flush(cout);
+    }
   } else if (cmd == "dump-trace") {
     if (tfile.empty()) {
       std::cerr << "Need trace_file" << std::endl;