summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-08-16 10:52:02 -0700
committerSage Weil <sage@inktank.com>2013-08-16 10:52:02 -0700
commit411871f6bcc9a4b81140c2e98d13dc123860f6f7 (patch)
treef93f4d8736c90aa6c426836cc0cb9fdc20502b1d
parent50698d1862065c8d74338fd08c7e5af66e222490 (diff)
downloadceph-411871f6bcc9a4b81140c2e98d13dc123860f6f7.tar.gz
mon/OSDMonitor: make 'osd pool rmsnap ...' not racy/crashy
NOTE: This is a manual backport of d90683fdeda15b726dcf0a7cab7006c31e99f14. Due to all kinds of collateral changes in the mon the original patch doesn't cleanly apply. Ensure that the snap does in fact exist before we try to remove it. This avoids a crash where a we get two dup rmsnap requests (due to thrashing, or a reconnect, or something), the committed (p) value does have the snap, but the uncommitted (pp) does not. This fails the old test such that we try to remove it from pp again, and assert. Restructure the flow so that it is easier to distinguish the committed short return from the uncommitted return (which must still wait for the commit). 0> 2013-07-16 14:21:27.189060 7fdf301e9700 -1 osd/osd_types.cc: In function 'void pg_pool_t::remove_snap(snapid_t)' thread 7fdf301e9700 time 2013-07-16 14:21:27.187095 osd/osd_types.cc: 662: FAILED assert(snaps.count(s)) ceph version 0.66-602-gcd39d8a (cd39d8a6727d81b889869e98f5869e4227b50720) 1: (pg_pool_t::remove_snap(snapid_t)+0x6d) [0x7ad6dd] 2: (OSDMonitor::prepare_command(MMonCommand*)+0x6407) [0x5c1517] 3: (OSDMonitor::prepare_update(PaxosServiceMessage*)+0x1fb) [0x5c41ab] 4: (PaxosService::dispatch(PaxosServiceMessage*)+0x937) [0x598c87] 5: (Monitor::handle_command(MMonCommand*)+0xe56) [0x56ec36] 6: (Monitor::_ms_dispatch(Message*)+0xd1d) [0x5719ad] 7: (Monitor::handle_forward(MForward*)+0x821) [0x572831] 8: (Monitor::_ms_dispatch(Message*)+0xe44) [0x571ad4] 9: (Monitor::ms_dispatch(Message*)+0x32) [0x588c52] 10: (DispatchQueue::entry()+0x549) [0x7cf1d9] 11: (DispatchQueue::DispatchThread::entry()+0xd) [0x7060fd] 12: (()+0x7e9a) [0x7fdf35165e9a] 13: (clone()+0x6d) [0x7fdf334fcccd] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Joao Eduardo Luis <joao.luis@inktank.com>
-rw-r--r--src/mon/OSDMonitor.cc52
1 files changed, 28 insertions, 24 deletions
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index ad4e5f1d4a1..3bec49ba614 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -3326,34 +3326,38 @@ bool OSDMonitor::prepare_command(MMonCommand *m)
}
}
else if (m->cmd.size() >= 5 && m->cmd[2] == "rmsnap") {
- int64_t pool = osdmap.lookup_pg_pool_name(m->cmd[3].c_str());
+ const string& poolstr = m->cmd[3].c_str();
+ int64_t pool = osdmap.lookup_pg_pool_name(poolstr);
if (pool < 0) {
- ss << "unrecognized pool '" << m->cmd[3] << "'";
+ ss << "unrecognized pool '" << poolstr << "'";
err = -ENOENT;
+ goto out;
+ }
+ const string& snapname = m->cmd[4];
+ const pg_pool_t *p = osdmap.get_pg_pool(pool);
+ if (!p->snap_exists(snapname.c_str())) {
+ ss << "pool " << poolstr << " snap " << snapname << " does not exist";
+ err = 0;
+ goto out;
+ }
+ pg_pool_t *pp = 0;
+ if (pending_inc.new_pools.count(pool))
+ pp = &pending_inc.new_pools[pool];
+ if (!pp) {
+ pp = &pending_inc.new_pools[pool];
+ *pp = *p;
+ }
+ snapid_t sn = pp->snap_exists(snapname.c_str());
+ if (sn) {
+ pp->remove_snap(sn);
+ pp->set_snap_epoch(pending_inc.epoch);
+ ss << "removed pool " << poolstr << " snap " << snapname;
} else {
- const pg_pool_t *p = osdmap.get_pg_pool(pool);
- pg_pool_t *pp = 0;
- if (pending_inc.new_pools.count(pool))
- pp = &pending_inc.new_pools[pool];
- const string& snapname = m->cmd[4];
- if (!p->snap_exists(snapname.c_str()) &&
- (!pp || !pp->snap_exists(snapname.c_str()))) {
- ss << "pool " << m->cmd[3] << " snap " << snapname << " does not exists";
- err = 0;
- } else {
- if (!pp) {
- pp = &pending_inc.new_pools[pool];
- *pp = *p;
- }
- snapid_t sn = pp->snap_exists(snapname.c_str());
- pp->remove_snap(sn);
- pp->set_snap_epoch(pending_inc.epoch);
- ss << "removed pool " << m->cmd[3] << " snap " << snapname;
- getline(ss, rs);
- wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_version()));
- return true;
- }
+ ss << "already removed pool " << poolstr << " snap " << snapname;
}
+ getline(ss, rs);
+ wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed()));
+ return true;
}
else if (m->cmd[2] == "create" && m->cmd.size() >= 3) {
if (m->cmd.size() < 5) {