diff options
author | Samuel Just <sam.just@inktank.com> | 2013-04-30 15:48:10 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-05-01 10:43:39 -0700 |
commit | 8a8ae159f5bf3dd663b7524b41b5bad276a4f6de (patch) | |
tree | 610b73a1eb3271e3a74b0bfeb11d194dc3e1ec6e | |
parent | fe68afe9d10bc5d49a05a8bafa644d57783447cf (diff) | |
download | ceph-8a8ae159f5bf3dd663b7524b41b5bad276a4f6de.tar.gz |
OSD: clean up in progress split state on pg removal
There are two cases: 1) The parent pg has not yet initiated the split 2) The
parent pg has initiated the split.
Previously in case 1), _remove_pg left the entry for its children in the
in_progress_splits map blocking subsequent peering attempts.
In case 1), we need to unblock requests on the child pgs for the parent on
parent removal. We don't need to bother waking requests since any requests
received prior to the remove_pg request are necessarily obsolete.
In case 2), we don't need to do anything: the child will complete the split on
its own anyway.
Thus, we now track pending_splits vs in_progress_splits. Children in
pending_splits are in state 1), in_progress_splits in state 2). split_pgs
bumps pgs from pending_splits to in_progress_splits atomically with respect to
_remove_pg since the parent pg lock is held in both places.
Fixes: #4813
Signed-off-by: Samuel Just <sam.just@inktank.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 100 | ||||
-rw-r--r-- | src/osd/OSD.h | 16 |
2 files changed, 95 insertions, 21 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index aa662b59b1a..e63361b8ddd 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -194,45 +194,107 @@ OSDService::OSDService(OSD *osd) : #endif {} -void OSDService::_start_split(const set<pg_t> &pgs) +void OSDService::_start_split(pg_t parent, const set<pg_t> &children) { - for (set<pg_t>::const_iterator i = pgs.begin(); - i != pgs.end(); + for (set<pg_t>::const_iterator i = children.begin(); + i != children.end(); + ++i) { + dout(10) << __func__ << ": Starting split on pg " << *i + << ", parent=" << parent << dendl; + assert(!pending_splits.count(*i)); + assert(!in_progress_splits.count(*i)); + pending_splits.insert(make_pair(*i, parent)); + + assert(!rev_pending_splits[parent].count(*i)); + rev_pending_splits[parent].insert(*i); + } +} + +void OSDService::mark_split_in_progress(pg_t parent, const set<pg_t> &children) +{ + Mutex::Locker l(in_progress_split_lock); + map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent); + assert(piter != rev_pending_splits.end()); + for (set<pg_t>::const_iterator i = children.begin(); + i != children.end(); ++i) { - dout(10) << __func__ << ": Starting split on pg " << *i << dendl; + assert(piter->second.count(*i)); + assert(pending_splits.count(*i)); assert(!in_progress_splits.count(*i)); + assert(pending_splits[*i] == parent); + + pending_splits.erase(*i); + piter->second.erase(*i); in_progress_splits.insert(*i); } + if (piter->second.empty()) + rev_pending_splits.erase(piter); +} + +void OSDService::cancel_pending_splits_for_parent(pg_t parent) +{ + Mutex::Locker l(in_progress_split_lock); + map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent); + if (piter == rev_pending_splits.end()) + return; + + for (set<pg_t>::iterator i = piter->second.begin(); + i != piter->second.end(); + ++i) { + assert(pending_splits.count(*i)); + assert(!in_progress_splits.count(*i)); + pending_splits.erase(*i); + } + rev_pending_splits.erase(piter); +} + +void OSDService::_maybe_split_pgid(OSDMapRef old_map, + OSDMapRef new_map, + pg_t pgid) +{ + assert(old_map->have_pg_pool(pgid.pool())); + if (pgid.ps() < static_cast<unsigned>(old_map->get_pg_num(pgid.pool()))) { + set<pg_t> children; + pgid.is_split(old_map->get_pg_num(pgid.pool()), + new_map->get_pg_num(pgid.pool()), &children); + _start_split(pgid, children); + } else { + assert(pgid.ps() < static_cast<unsigned>(new_map->get_pg_num(pgid.pool()))); + } } void OSDService::expand_pg_num(OSDMapRef old_map, OSDMapRef new_map) { Mutex::Locker l(in_progress_split_lock); - set<pg_t> children; for (set<pg_t>::iterator i = in_progress_splits.begin(); i != in_progress_splits.end(); - ) { - assert(old_map->have_pg_pool(i->pool())); + ) { if (!new_map->have_pg_pool(i->pool())) { in_progress_splits.erase(i++); } else { - if (i->ps() < static_cast<unsigned>(old_map->get_pg_num(i->pool()))) { - i->is_split(old_map->get_pg_num(i->pool()), - new_map->get_pg_num(i->pool()), &children); - } else { - assert(i->ps() < static_cast<unsigned>(new_map->get_pg_num(i->pool()))); - } + _maybe_split_pgid(old_map, new_map, *i); + ++i; + } + } + for (map<pg_t, pg_t>::iterator i = pending_splits.begin(); + i != pending_splits.end(); + ) { + if (!new_map->have_pg_pool(i->first.pool())) { + rev_pending_splits.erase(i->second); + pending_splits.erase(i++); + } else { + _maybe_split_pgid(old_map, new_map, i->first); ++i; } } - _start_split(children); } bool OSDService::splitting(pg_t pgid) { Mutex::Locker l(in_progress_split_lock); - return in_progress_splits.count(pgid); + return in_progress_splits.count(pgid) || + pending_splits.count(pgid); } void OSDService::complete_split(const set<pg_t> &pgs) @@ -242,6 +304,7 @@ void OSDService::complete_split(const set<pg_t> &pgs) i != pgs.end(); ++i) { dout(10) << __func__ << ": Completing split on pg " << *i << dendl; + assert(!pending_splits.count(*i)); assert(in_progress_splits.count(*i)); in_progress_splits.erase(*i); } @@ -1680,7 +1743,7 @@ void OSD::load_pgs() pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()), osdmap->get_pg_num(pg->info.pgid.pool()), &split_pgs)) { - service.start_split(split_pgs); + service.start_split(pg->info.pgid, split_pgs); } pg->reg_next_scrub(); @@ -4385,6 +4448,7 @@ void OSD::advance_pg( lastmap->get_pg_num(pg->pool.id), nextmap->get_pg_num(pg->pool.id), &children)) { + service.mark_split_in_progress(pg->info.pgid, children); split_pgs( pg, children, new_pgs, lastmap, nextmap, rctx); @@ -4507,7 +4571,7 @@ void OSD::consume_map() service.get_osdmap()->get_pg_num(it->first.pool()), osdmap->get_pg_num(it->first.pool()), &split_pgs)) { - service.start_split(split_pgs); + service.start_split(it->first, split_pgs); } pg->unlock(); @@ -5841,6 +5905,8 @@ void OSD::_remove_pg(PG *pg) // and handle_notify_timeout pg->on_removal(rmt); + service.cancel_pending_splits_for_parent(pg->info.pgid); + coll_t to_remove = get_next_removal_coll(pg->info.pgid); removals.push_back(to_remove); rmt->collection_rename(coll_t(pg->info.pgid), to_remove); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 513bd43ec6c..f894768fbe5 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -386,16 +386,24 @@ public: // split Mutex in_progress_split_lock; - set<pg_t> in_progress_splits; - void _start_split(const set<pg_t> &pgs); - void start_split(const set<pg_t> &pgs) { + map<pg_t, pg_t> pending_splits; // child -> parent + map<pg_t, set<pg_t> > rev_pending_splits; // parent -> [children] + set<pg_t> in_progress_splits; // child + + void _start_split(pg_t parent, const set<pg_t> &children); + void start_split(pg_t parent, const set<pg_t> &children) { Mutex::Locker l(in_progress_split_lock); - return _start_split(pgs); + return _start_split(parent, children); } + void mark_split_in_progress(pg_t parent, const set<pg_t> &pgs); void complete_split(const set<pg_t> &pgs); + void cancel_pending_splits_for_parent(pg_t parent); bool splitting(pg_t pgid); void expand_pg_num(OSDMapRef old_map, OSDMapRef new_map); + void _maybe_split_pgid(OSDMapRef old_map, + OSDMapRef new_map, + pg_t pgid); // -- OSD Full Status -- Mutex full_status_lock; |