summaryrefslogtreecommitdiff
path: root/src/osd/OSD.cc
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-04-30 15:48:10 -0700
committerSamuel Just <sam.just@inktank.com>2013-05-01 10:43:39 -0700
commit8a8ae159f5bf3dd663b7524b41b5bad276a4f6de (patch)
tree610b73a1eb3271e3a74b0bfeb11d194dc3e1ec6e /src/osd/OSD.cc
parentfe68afe9d10bc5d49a05a8bafa644d57783447cf (diff)
downloadceph-8a8ae159f5bf3dd663b7524b41b5bad276a4f6de.tar.gz
OSD: clean up in progress split state on pg removal
There are two cases: 1) The parent pg has not yet initiated the split 2) The parent pg has initiated the split. Previously in case 1), _remove_pg left the entry for its children in the in_progress_splits map blocking subsequent peering attempts. In case 1), we need to unblock requests on the child pgs for the parent on parent removal. We don't need to bother waking requests since any requests received prior to the remove_pg request are necessarily obsolete. In case 2), we don't need to do anything: the child will complete the split on its own anyway. Thus, we now track pending_splits vs in_progress_splits. Children in pending_splits are in state 1), in_progress_splits in state 2). split_pgs bumps pgs from pending_splits to in_progress_splits atomically with respect to _remove_pg since the parent pg lock is held in both places. Fixes: #4813 Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Greg Farnum <greg@inktank.com>
Diffstat (limited to 'src/osd/OSD.cc')
-rw-r--r--src/osd/OSD.cc100
1 files changed, 83 insertions, 17 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index aa662b59b1a..e63361b8ddd 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -194,45 +194,107 @@ OSDService::OSDService(OSD *osd) :
#endif
{}
-void OSDService::_start_split(const set<pg_t> &pgs)
+void OSDService::_start_split(pg_t parent, const set<pg_t> &children)
{
- for (set<pg_t>::const_iterator i = pgs.begin();
- i != pgs.end();
+ for (set<pg_t>::const_iterator i = children.begin();
+ i != children.end();
+ ++i) {
+ dout(10) << __func__ << ": Starting split on pg " << *i
+ << ", parent=" << parent << dendl;
+ assert(!pending_splits.count(*i));
+ assert(!in_progress_splits.count(*i));
+ pending_splits.insert(make_pair(*i, parent));
+
+ assert(!rev_pending_splits[parent].count(*i));
+ rev_pending_splits[parent].insert(*i);
+ }
+}
+
+void OSDService::mark_split_in_progress(pg_t parent, const set<pg_t> &children)
+{
+ Mutex::Locker l(in_progress_split_lock);
+ map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent);
+ assert(piter != rev_pending_splits.end());
+ for (set<pg_t>::const_iterator i = children.begin();
+ i != children.end();
++i) {
- dout(10) << __func__ << ": Starting split on pg " << *i << dendl;
+ assert(piter->second.count(*i));
+ assert(pending_splits.count(*i));
assert(!in_progress_splits.count(*i));
+ assert(pending_splits[*i] == parent);
+
+ pending_splits.erase(*i);
+ piter->second.erase(*i);
in_progress_splits.insert(*i);
}
+ if (piter->second.empty())
+ rev_pending_splits.erase(piter);
+}
+
+void OSDService::cancel_pending_splits_for_parent(pg_t parent)
+{
+ Mutex::Locker l(in_progress_split_lock);
+ map<pg_t, set<pg_t> >::iterator piter = rev_pending_splits.find(parent);
+ if (piter == rev_pending_splits.end())
+ return;
+
+ for (set<pg_t>::iterator i = piter->second.begin();
+ i != piter->second.end();
+ ++i) {
+ assert(pending_splits.count(*i));
+ assert(!in_progress_splits.count(*i));
+ pending_splits.erase(*i);
+ }
+ rev_pending_splits.erase(piter);
+}
+
+void OSDService::_maybe_split_pgid(OSDMapRef old_map,
+ OSDMapRef new_map,
+ pg_t pgid)
+{
+ assert(old_map->have_pg_pool(pgid.pool()));
+ if (pgid.ps() < static_cast<unsigned>(old_map->get_pg_num(pgid.pool()))) {
+ set<pg_t> children;
+ pgid.is_split(old_map->get_pg_num(pgid.pool()),
+ new_map->get_pg_num(pgid.pool()), &children);
+ _start_split(pgid, children);
+ } else {
+ assert(pgid.ps() < static_cast<unsigned>(new_map->get_pg_num(pgid.pool())));
+ }
}
void OSDService::expand_pg_num(OSDMapRef old_map,
OSDMapRef new_map)
{
Mutex::Locker l(in_progress_split_lock);
- set<pg_t> children;
for (set<pg_t>::iterator i = in_progress_splits.begin();
i != in_progress_splits.end();
- ) {
- assert(old_map->have_pg_pool(i->pool()));
+ ) {
if (!new_map->have_pg_pool(i->pool())) {
in_progress_splits.erase(i++);
} else {
- if (i->ps() < static_cast<unsigned>(old_map->get_pg_num(i->pool()))) {
- i->is_split(old_map->get_pg_num(i->pool()),
- new_map->get_pg_num(i->pool()), &children);
- } else {
- assert(i->ps() < static_cast<unsigned>(new_map->get_pg_num(i->pool())));
- }
+ _maybe_split_pgid(old_map, new_map, *i);
+ ++i;
+ }
+ }
+ for (map<pg_t, pg_t>::iterator i = pending_splits.begin();
+ i != pending_splits.end();
+ ) {
+ if (!new_map->have_pg_pool(i->first.pool())) {
+ rev_pending_splits.erase(i->second);
+ pending_splits.erase(i++);
+ } else {
+ _maybe_split_pgid(old_map, new_map, i->first);
++i;
}
}
- _start_split(children);
}
bool OSDService::splitting(pg_t pgid)
{
Mutex::Locker l(in_progress_split_lock);
- return in_progress_splits.count(pgid);
+ return in_progress_splits.count(pgid) ||
+ pending_splits.count(pgid);
}
void OSDService::complete_split(const set<pg_t> &pgs)
@@ -242,6 +304,7 @@ void OSDService::complete_split(const set<pg_t> &pgs)
i != pgs.end();
++i) {
dout(10) << __func__ << ": Completing split on pg " << *i << dendl;
+ assert(!pending_splits.count(*i));
assert(in_progress_splits.count(*i));
in_progress_splits.erase(*i);
}
@@ -1680,7 +1743,7 @@ void OSD::load_pgs()
pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()),
osdmap->get_pg_num(pg->info.pgid.pool()),
&split_pgs)) {
- service.start_split(split_pgs);
+ service.start_split(pg->info.pgid, split_pgs);
}
pg->reg_next_scrub();
@@ -4385,6 +4448,7 @@ void OSD::advance_pg(
lastmap->get_pg_num(pg->pool.id),
nextmap->get_pg_num(pg->pool.id),
&children)) {
+ service.mark_split_in_progress(pg->info.pgid, children);
split_pgs(
pg, children, new_pgs, lastmap, nextmap,
rctx);
@@ -4507,7 +4571,7 @@ void OSD::consume_map()
service.get_osdmap()->get_pg_num(it->first.pool()),
osdmap->get_pg_num(it->first.pool()),
&split_pgs)) {
- service.start_split(split_pgs);
+ service.start_split(it->first, split_pgs);
}
pg->unlock();
@@ -5841,6 +5905,8 @@ void OSD::_remove_pg(PG *pg)
// and handle_notify_timeout
pg->on_removal(rmt);
+ service.cancel_pending_splits_for_parent(pg->info.pgid);
+
coll_t to_remove = get_next_removal_coll(pg->info.pgid);
removals.push_back(to_remove);
rmt->collection_rename(coll_t(pg->info.pgid), to_remove);