diff options
author | Samuel Just <sam.just@inktank.com> | 2013-04-30 15:48:10 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-05-01 10:43:39 -0700 |
commit | 8a8ae159f5bf3dd663b7524b41b5bad276a4f6de (patch) | |
tree | 610b73a1eb3271e3a74b0bfeb11d194dc3e1ec6e /src/osd/OSD.cc | |
parent | fe68afe9d10bc5d49a05a8bafa644d57783447cf (diff) | |
download | ceph-8a8ae159f5bf3dd663b7524b41b5bad276a4f6de.tar.gz |
OSD: clean up in progress split state on pg removal
There are two cases: 1) The parent pg has not yet initiated the split 2) The
parent pg has initiated the split.
Previously in case 1), _remove_pg left the entry for its children in the
in_progress_splits map blocking subsequent peering attempts.
In case 1), we need to unblock requests on the child pgs for the parent on
parent removal. We don't need to bother waking requests since any requests
received prior to the remove_pg request are necessarily obsolete.
In case 2), we don't need to do anything: the child will complete the split on
its own anyway.
Thus, we now track pending_splits vs in_progress_splits. Children in
pending_splits are in state 1), in_progress_splits in state 2). split_pgs
bumps pgs from pending_splits to in_progress_splits atomically with respect to
_remove_pg since the parent pg lock is held in both places.
Fixes: #4813
Signed-off-by: Samuel Just <sam.just@inktank.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
Diffstat (limited to 'src/osd/OSD.cc')
-rw-r--r-- | src/osd/OSD.cc | 100 |
1 files changed, 83 insertions, 17 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index aa662b59b1a..e63361b8ddd 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -194,45 +194,107 @@ OSDService::OSDService(OSD *osd) : #endif {} -void OSDService::_start_split(const set<pg_t> &pgs) +void OSDService::_start_split(pg_t parent, const set<pg_t> &children) { - for (set<pg_t>::const_iterator i = pgs.begin(); - i != pgs.end(); + for (set<pg_t>::const_iterator i = children.begin(); + i != children.end(); + ++i) { + dout(10) << __func__ << ": Starting split on pg " << *i + << ", parent=" << parent << dendl; + assert(!pending_splits.count(*i)); + assert(!in_progress_splits.count(*i)); + pending_splits.insert(make_pair(*i, parent)); + + assert(!rev_pending_splits[parent].count(*i)); + rev_pending_splits[parent].insert(*i); + } +} + +void OSDService::mark_split_in_progress(pg_t parent, const set<pg_t> &children) +{ + Mutex::Locker l(in_progress_split_lock); + map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent); + assert(piter != rev_pending_splits.end()); + for (set<pg_t>::const_iterator i = children.begin(); + i != children.end(); ++i) { - dout(10) << __func__ << ": Starting split on pg " << *i << dendl; + assert(piter->second.count(*i)); + assert(pending_splits.count(*i)); assert(!in_progress_splits.count(*i)); + assert(pending_splits[*i] == parent); + + pending_splits.erase(*i); + piter->second.erase(*i); in_progress_splits.insert(*i); } + if (piter->second.empty()) + rev_pending_splits.erase(piter); +} + +void OSDService::cancel_pending_splits_for_parent(pg_t parent) +{ + Mutex::Locker l(in_progress_split_lock); + map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent); + if (piter == rev_pending_splits.end()) + return; + + for (set<pg_t>::iterator i = piter->second.begin(); + i != piter->second.end(); + ++i) { + assert(pending_splits.count(*i)); + assert(!in_progress_splits.count(*i)); + pending_splits.erase(*i); + } + rev_pending_splits.erase(piter); +} + +void OSDService::_maybe_split_pgid(OSDMapRef old_map, + OSDMapRef new_map, + pg_t pgid) +{ + assert(old_map->have_pg_pool(pgid.pool())); + if (pgid.ps() < static_cast<unsigned>(old_map->get_pg_num(pgid.pool()))) { + set<pg_t> children; + pgid.is_split(old_map->get_pg_num(pgid.pool()), + new_map->get_pg_num(pgid.pool()), &children); + _start_split(pgid, children); + } else { + assert(pgid.ps() < static_cast<unsigned>(new_map->get_pg_num(pgid.pool()))); + } } void OSDService::expand_pg_num(OSDMapRef old_map, OSDMapRef new_map) { Mutex::Locker l(in_progress_split_lock); - set<pg_t> children; for (set<pg_t>::iterator i = in_progress_splits.begin(); i != in_progress_splits.end(); - ) { - assert(old_map->have_pg_pool(i->pool())); + ) { if (!new_map->have_pg_pool(i->pool())) { in_progress_splits.erase(i++); } else { - if (i->ps() < static_cast<unsigned>(old_map->get_pg_num(i->pool()))) { - i->is_split(old_map->get_pg_num(i->pool()), - new_map->get_pg_num(i->pool()), &children); - } else { - assert(i->ps() < static_cast<unsigned>(new_map->get_pg_num(i->pool()))); - } + _maybe_split_pgid(old_map, new_map, *i); + ++i; + } + } + for (map<pg_t, pg_t>::iterator i = pending_splits.begin(); + i != pending_splits.end(); + ) { + if (!new_map->have_pg_pool(i->first.pool())) { + rev_pending_splits.erase(i->second); + pending_splits.erase(i++); + } else { + _maybe_split_pgid(old_map, new_map, i->first); ++i; } } - _start_split(children); } bool OSDService::splitting(pg_t pgid) { Mutex::Locker l(in_progress_split_lock); - return in_progress_splits.count(pgid); + return in_progress_splits.count(pgid) || + pending_splits.count(pgid); } void OSDService::complete_split(const set<pg_t> &pgs) @@ -242,6 +304,7 @@ void OSDService::complete_split(const set<pg_t> &pgs) i != pgs.end(); ++i) { dout(10) << __func__ << ": Completing split on pg " << *i << dendl; + assert(!pending_splits.count(*i)); assert(in_progress_splits.count(*i)); in_progress_splits.erase(*i); } @@ -1680,7 +1743,7 @@ void OSD::load_pgs() pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()), osdmap->get_pg_num(pg->info.pgid.pool()), &split_pgs)) { - service.start_split(split_pgs); + service.start_split(pg->info.pgid, split_pgs); } pg->reg_next_scrub(); @@ -4385,6 +4448,7 @@ void OSD::advance_pg( lastmap->get_pg_num(pg->pool.id), nextmap->get_pg_num(pg->pool.id), &children)) { + service.mark_split_in_progress(pg->info.pgid, children); split_pgs( pg, children, new_pgs, lastmap, nextmap, rctx); @@ -4507,7 +4571,7 @@ void OSD::consume_map() service.get_osdmap()->get_pg_num(it->first.pool()), osdmap->get_pg_num(it->first.pool()), &split_pgs)) { - service.start_split(split_pgs); + service.start_split(it->first, split_pgs); } pg->unlock(); @@ -5841,6 +5905,8 @@ void OSD::_remove_pg(PG *pg) // and handle_notify_timeout pg->on_removal(rmt); + service.cancel_pending_splits_for_parent(pg->info.pgid); + coll_t to_remove = get_next_removal_coll(pg->info.pgid); removals.push_back(to_remove); rmt->collection_rename(coll_t(pg->info.pgid), to_remove); |