summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Zafman <david.zafman@inktank.com>2013-10-11 15:53:49 -0700
committerDavid Zafman <david.zafman@inktank.com>2013-10-22 10:10:18 -0700
commit5296752193159b8f2b783741b13ac83070ea895a (patch)
tree6c3c306045cb01b22e432e3d453831c7c4e34a2c
parentbed222280647ab8cd9a3a419814886a4fe604264 (diff)
downloadceph-5296752193159b8f2b783741b13ac83070ea895a.tar.gz
Backfill peers should not be included in the acting set
Create actingbackfill in choose_acting() Use first backfill target as previously Add asserts to catch inappropriate use of actingbackfill fixes: #5855 Signed-off-by: David Zafman <david.zafman@inktank.com>
-rw-r--r--src/osd/OSD.cc17
-rw-r--r--src/osd/PG.cc307
-rw-r--r--src/osd/PG.h35
-rw-r--r--src/osd/PGBackend.h1
-rw-r--r--src/osd/ReplicatedPG.cc92
-rw-r--r--src/osd/ReplicatedPG.h6
6 files changed, 276 insertions, 182 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 7695d10b8e7..81a33a72788 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2036,8 +2036,8 @@ void OSD::load_pgs()
pg->reg_next_scrub();
// generate state for PG's current mapping
- pg->get_osdmap()->pg_to_up_acting_osds(pgid, pg->up, pg->acting);
- int role = pg->get_osdmap()->calc_pg_role(whoami, pg->acting);
+ pg->get_osdmap()->pg_to_up_acting_osds(pgid, pg->up, pg->actingonly);
+ int role = pg->get_osdmap()->calc_pg_role(whoami, pg->actingonly);
pg->set_role(role);
PG::RecoveryCtx rctx(0, 0, 0, 0, 0, 0);
@@ -2280,7 +2280,7 @@ void OSD::handle_pg_peering_evt(
true,
old_pg_state->role,
old_pg_state->up,
- old_pg_state->acting,
+ old_pg_state->actingonly,
old_pg_state->info.history,
old_pg_state->past_intervals,
*rctx.transaction);
@@ -2309,7 +2309,7 @@ void OSD::handle_pg_peering_evt(
true,
old_pg_state->role,
old_pg_state->up,
- old_pg_state->acting,
+ old_pg_state->actingonly,
old_pg_state->info.history,
old_pg_state->past_intervals,
*rctx.transaction
@@ -6016,8 +6016,11 @@ void OSD::dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg)
bool OSD::compat_must_dispatch_immediately(PG *pg)
{
assert(pg->is_locked());
- for (vector<int>::iterator i = pg->acting.begin();
- i != pg->acting.end();
+ vector<int> *tmpacting = &pg->actingonly;
+ if (pg->actingbackfill.size() > 0)
+ tmpacting = &pg->actingbackfill;
+ for (vector<int>::iterator i = tmpacting->begin();
+ i != tmpacting->end();
++i) {
if (*i == whoami)
continue;
@@ -6878,7 +6881,7 @@ void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op)
clog.warn() << m->get_source_inst() << " misdirected " << m->get_reqid()
<< " pg " << m->get_pg()
<< " to osd." << whoami
- << " not " << pg->acting
+ << " not " << pg->actingonly
<< " in e" << m->get_map_epoch() << "/" << osdmap->get_epoch() << "\n";
reply_op_error(op, -ENXIO);
}
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 8ba5b6cc2ff..e745a3f5f01 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -170,7 +170,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,
need_flush(false),
last_peering_reset(0),
heartbeat_peer_lock("PG::heartbeat_peer_lock"),
- backfill_target(-1),
backfill_reserved(0),
backfill_reserving(0),
flushed(false),
@@ -288,7 +287,7 @@ bool PG::proc_replica_info(int from, const pg_info_t &oinfo)
reg_next_scrub();
// stray?
- if (!is_acting(from)) {
+ if (!is_actingbackfill(from)) {
dout(10) << " osd." << from << " has stray content: " << oinfo << dendl;
stray_set.insert(from);
if (is_clean()) {
@@ -484,8 +483,9 @@ bool PG::needs_recovery() const
ret = true;
}
- vector<int>::const_iterator end = acting.end();
- vector<int>::const_iterator a = acting.begin();
+ assert(actingbackfill.size() > 0);
+ vector<int>::const_iterator end = actingbackfill.end();
+ vector<int>::const_iterator a = actingbackfill.begin();
assert(a != end);
++a;
for (; a != end; ++a) {
@@ -513,10 +513,10 @@ bool PG::needs_backfill() const
bool ret = false;
- vector<int>::const_iterator end = acting.end();
- vector<int>::const_iterator a = acting.begin();
- assert(a != end);
- ++a;
+ //We can assume that only possible osds that need backfill
+ //are on the backfill_targets vector.
+ vector<int>::const_iterator end = backfill_targets.end();
+ vector<int>::const_iterator a = backfill_targets.begin();
for (; a != end; ++a) {
int peer = *a;
map<int,pg_info_t>::const_iterator pi = peer_info.find(peer);
@@ -699,7 +699,7 @@ void PG::build_prior(std::auto_ptr<PriorSet> &prior_set)
prior_set.reset(new PriorSet(*get_osdmap(),
past_intervals,
up,
- acting,
+ actingonly,
info,
this));
PriorSet &prior(*prior_set.get());
@@ -827,7 +827,8 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t>
}
}
// prefer current primary (usually the caller), all things being equal
- if (p->first == acting[0]) {
+ //rename actingonly -> acting
+ if (p->first == actingonly[0]) {
dout(10) << "calc_acting prefer osd." << p->first
<< " because it is current primary" << dendl;
best = p;
@@ -844,7 +845,7 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t>
* incomplete, or another osd has a longer tail that allows us to
* bring other up nodes up to date.
*/
-bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
+bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want, vector<int>& backfill) const
{
map<int, pg_info_t> all_info(peer_info.begin(), peer_info.end());
all_info[osd->whoami] = info;
@@ -856,7 +857,8 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
map<int, pg_info_t>::const_iterator newest_update_osd = find_best_info(all_info);
if (newest_update_osd == all_info.end()) {
- if (up != acting) {
+ //Is actingbackfill even set in this case?
+ if (up != actingonly) {
dout(10) << "calc_acting no suitable info found (incomplete backfills?), reverting to up" << dendl;
want = up;
return true;
@@ -908,7 +910,6 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
<< " with " << primary->second << dendl;
want.push_back(primary->first);
unsigned usable = 1;
- unsigned backfill = 0;
// select replicas that have log contiguity with primary.
// prefer up, then acting, then any peer_info osds
@@ -919,13 +920,8 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
continue;
const pg_info_t &cur_info = all_info.find(*i)->second;
if (cur_info.is_incomplete() || cur_info.last_update < primary->second.log_tail) {
- if (backfill < 1) {
- dout(10) << " osd." << *i << " (up) accepted (backfill) " << cur_info << dendl;
- want.push_back(*i);
- backfill++;
- } else {
- dout(10) << " osd." << *i << " (up) rejected" << cur_info << dendl;
- }
+ dout(10) << " osd." << *i << " (up) backfill " << cur_info << dendl;
+ backfill.push_back(*i);
} else {
want.push_back(*i);
usable++;
@@ -933,8 +929,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
}
}
- for (vector<int>::const_iterator i = acting.begin();
- i != acting.end();
+ //XXX: This no longer gets backfill OSDs, so I assume it is
+ //covered by the up OSDs.
+ for (vector<int>::const_iterator i = actingonly.begin();
+ i != actingonly.end();
++i) {
if (usable >= get_osdmap()->get_pg_size(info.pgid))
break;
@@ -956,6 +954,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
}
}
+ //XXX: Should we go through backfill_targets here, but that hasn't been
+ //propogated through the mons.
+ //Ignore up and acting from above
+
for (map<int,pg_info_t>::const_iterator i = all_info.begin();
i != all_info.end();
++i) {
@@ -968,9 +970,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
vector<int>::const_iterator up_it = find(up.begin(), up.end(), i->first);
if (up_it != up.end())
continue;
- vector<int>::const_iterator acting_it = find(acting.begin(), acting.end(), i->first);
- if (acting_it != acting.end())
+ vector<int>::const_iterator acting_it = find(actingonly.begin(), actingonly.end(), i->first);
+ if (acting_it != actingonly.end())
continue;
+ //XXX: Ignore backfill_targets items
if (i->second.is_incomplete() || i->second.last_update < primary->second.log_tail) {
dout(10) << " osd." << i->first << " (stray) REJECTED " << i->second << dendl;
@@ -992,33 +995,53 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const
*/
bool PG::choose_acting(int& newest_update_osd)
{
- vector<int> want;
+ vector<int> want, backfill;
- if (!calc_acting(newest_update_osd, want)) {
+ if (!calc_acting(newest_update_osd, want, backfill)) {
dout(10) << "choose_acting failed" << dendl;
assert(want_acting.empty());
return false;
}
- if (want.size() < pool.info.min_size) {
+ //For now we only backfill 1 at a time as before
+ if (!backfill.empty())
+ backfill.resize(1);
+
+ //This might cause a problem if min_size is large
+ //and we need to backfill more than 1 osd. Older
+ //code would only include 1 backfill osd and now we
+ //have the resize above.
+ if (want.size() + backfill.size() < pool.info.min_size) {
want_acting.clear();
return false;
}
- if (want != acting) {
- dout(10) << "choose_acting want " << want << " != acting " << acting
+ if (want != actingonly) {
+ dout(10) << "choose_acting want " << want << " != acting " << actingonly
<< ", requesting pg_temp change" << dendl;
want_acting = want;
+
if (want == up) {
+ //There can't be any pending backfill if
+ //want is the same as crush map up OSDs.
+ assert(backfill.empty());
vector<int> empty;
osd->queue_want_pg_temp(info.pgid, empty);
} else
osd->queue_want_pg_temp(info.pgid, want);
return false;
- } else {
- want_acting.clear();
}
- dout(10) << "choose_acting want " << want << " (== acting)" << dendl;
+ want_acting.clear();
+ //We can only get here when new interval has arrived and
+ //we've accepted the acting set. Now we can create
+ //actingbackfill and backfill_targets vectors.
+ assert(actingbackfill.size() == 0);
+ assert(backfill_targets.size() == 0);
+ actingbackfill = actingonly;
+ actingbackfill.insert(actingbackfill.end(), backfill.begin(), backfill.end());
+ backfill_targets = backfill;
+ dout(10) << "choose_acting want " << want << " (== acting) backfill_targets "
+ << backfill << dendl;
return true;
}
@@ -1182,8 +1205,9 @@ void PG::activate(ObjectStore::Transaction& t,
// count replicas that are not backfilling
unsigned active = 1;
- for (unsigned i=1; i<acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
assert(peer_info.count(peer));
pg_info_t& pi = peer_info[peer];
@@ -1273,8 +1297,10 @@ void PG::activate(ObjectStore::Transaction& t,
}
}
+ assert(active == actingonly.size());
+
// degraded?
- if (get_osdmap()->get_pg_size(info.pgid) > active)
+ if (get_osdmap()->get_pg_size(info.pgid) > actingonly.size())
state_set(PG_STATE_DEGRADED);
// all clean?
@@ -1437,7 +1463,8 @@ void PG::_activate_committed(epoch_t e)
dout(10) << "_activate_committed " << e << " peer_activated now " << peer_activated
<< " last_epoch_started " << info.history.last_epoch_started
<< " same_interval_since " << info.history.same_interval_since << dendl;
- if (peer_activated.size() == acting.size())
+ assert(actingbackfill.size() > 0);
+ if (peer_activated.size() == actingbackfill.size())
all_activated_and_committed();
} else {
dout(10) << "_activate_committed " << e << " telling primary" << dendl;
@@ -1447,7 +1474,7 @@ void PG::_activate_committed(epoch_t e)
info);
i.info.history.last_epoch_started = e;
m->pg_list.push_back(make_pair(i, pg_interval_map_t()));
- osd->send_message_osd_cluster(acting[0], m, get_osdmap()->get_epoch());
+ osd->send_message_osd_cluster(actingonly[0], m, get_osdmap()->get_epoch());
}
if (dirty_info) {
@@ -1469,7 +1496,8 @@ void PG::all_activated_and_committed()
{
dout(10) << "all_activated_and_committed" << dendl;
assert(is_primary());
- assert(peer_activated.size() == acting.size());
+ assert(peer_activated.size() == actingbackfill.size());
+ assert(actingbackfill.size() > 0);
// info.last_epoch_started is set during activate()
info.history.last_epoch_started = info.last_epoch_started;
@@ -1525,8 +1553,8 @@ void PG::mark_clean()
{
// only mark CLEAN if we have the desired number of replicas AND we
// are not remapped.
- if (acting.size() == get_osdmap()->get_pg_size(info.pgid) &&
- up == acting)
+ if (actingonly.size() == get_osdmap()->get_pg_size(info.pgid) &&
+ up == actingonly)
state_set(PG_STATE_CLEAN);
// NOTE: this is actually a bit premature: we haven't purged the
@@ -1704,8 +1732,10 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits)
child->snap_trimq = snap_trimq;
- get_osdmap()->pg_to_up_acting_osds(child->info.pgid, child->up, child->acting);
- child->role = get_osdmap()->calc_pg_role(osd->whoami, child->acting);
+ //What to do during split?
+ //XXX: Can there be backfill going on or not?
+ get_osdmap()->pg_to_up_acting_osds(child->info.pgid, child->up, child->actingonly);
+ child->role = get_osdmap()->calc_pg_role(osd->whoami, child->actingonly);
if (get_primary() != child->get_primary())
child->info.history.same_primary_since = get_osdmap()->get_epoch();
@@ -1736,7 +1766,7 @@ void PG::clear_recovery_state()
finish_recovery_op(soid, true);
}
- backfill_target = -1;
+ backfill_targets.clear();
backfill_info.clear();
peer_backfill_info.clear();
waiting_on_backfill = false;
@@ -1802,11 +1832,12 @@ void PG::clear_probe_targets()
void PG::update_heartbeat_peers()
{
assert(is_locked());
+ vector <int> hbpeers;
set<int> new_peers;
if (role == 0) {
- for (unsigned i=0; i<acting.size(); i++)
- new_peers.insert(acting[i]);
+ for (unsigned i=0; i<actingonly.size(); i++)
+ new_peers.insert(actingonly[i]);
for (unsigned i=0; i<up.size(); i++)
new_peers.insert(up[i]);
for (map<int,pg_info_t>::iterator p = peer_info.begin(); p != peer_info.end(); ++p)
@@ -1875,7 +1906,7 @@ void PG::publish_stats_to_osd()
pg_stats_publish.stats.add(unstable_stats);
// calc copies, degraded
- unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), acting.size());
+ unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), actingbackfill.size());
pg_stats_publish.stats.calc_copies(target);
pg_stats_publish.stats.sum.num_objects_degraded = 0;
if ((is_degraded() || !is_clean()) && is_active()) {
@@ -1885,23 +1916,24 @@ void PG::publish_stats_to_osd()
uint64_t degraded = 0;
- // if the acting set is smaller than we want, add in those missing replicas
- if (acting.size() < target)
- degraded += (target - acting.size()) * num_objects;
+ // if the actingbackfill set is smaller than we want, add in those missing replicas
+ if (actingbackfill.size() < target)
+ degraded += (target - actingbackfill.size()) * num_objects;
// missing on primary
pg_stats_publish.stats.sum.num_objects_missing_on_primary =
pg_log.get_missing().num_missing();
degraded += pg_log.get_missing().num_missing();
- for (unsigned i=1; i<acting.size(); i++) {
- assert(peer_missing.count(acting[i]));
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ assert(peer_missing.count(actingbackfill[i]));
// in missing set
- degraded += peer_missing[acting[i]].num_missing();
+ degraded += peer_missing[actingbackfill[i]].num_missing();
// not yet backfilled
- degraded += num_objects - peer_info[acting[i]].stats.stats.sum.num_objects;
+ degraded += num_objects - peer_info[actingbackfill[i]].stats.stats.sum.num_objects;
}
pg_stats_publish.stats.sum.num_objects_degraded = degraded;
pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound();
@@ -1955,14 +1987,14 @@ void PG::init(int role, vector<int>& newup, vector<int>& newacting,
<< dendl;
set_role(role);
- acting = newacting;
+ actingonly = newacting;
up = newup;
info.history = history;
past_intervals.swap(pi);
info.stats.up = up;
- info.stats.acting = acting;
+ info.stats.acting = actingonly;
info.stats.mapping_epoch = info.history.same_interval_since;
if (backfill) {
@@ -2213,11 +2245,13 @@ void PG::write_if_dirty(ObjectStore::Transaction& t)
void PG::trim_peers()
{
+ assert(is_primary());
calc_trim_to();
dout(10) << "trim_peers " << pg_trim_to << dendl;
if (pg_trim_to != eversion_t()) {
- for (unsigned i=1; i<acting.size(); i++)
- osd->send_message_osd_cluster(acting[i],
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++)
+ osd->send_message_osd_cluster(actingbackfill[i],
new MOSDPGTrim(get_osdmap()->get_epoch(), info.pgid,
pg_trim_to),
get_osdmap()->get_epoch());
@@ -2590,7 +2624,7 @@ bool PG::sched_scrub()
clear_scrub_reserved();
scrub_unreserve_replicas();
ret = false;
- } else if (scrubber.reserved_peers.size() == acting.size()) {
+ } else if (scrubber.reserved_peers.size() == actingonly.size()) {
dout(20) << "sched_scrub: success, reserved self and replicas" << dendl;
if (time_for_deep) {
dout(10) << "sched_scrub: scrub will be deep" << dendl;
@@ -2888,7 +2922,7 @@ void PG::sub_op_scrub_stop(OpRequestRef op)
void PG::reject_reservation()
{
osd->send_message_osd_cluster(
- acting[0],
+ actingonly[0],
new MBackfillReserve(
MBackfillReserve::REJECT,
info.pgid,
@@ -2920,8 +2954,10 @@ void PG::clear_scrub_reserved()
void PG::scrub_reserve_replicas()
{
- for (unsigned i=1; i<acting.size(); i++) {
- dout(10) << "scrub requesting reserve from osd." << acting[i] << dendl;
+ //XXX: Don't see how we don't scrub during backfill
+ assert(backfill_targets.empty());
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ dout(10) << "scrub requesting reserve from osd." << actingonly[i] << dendl;
vector<OSDOp> scrub(1);
scrub[0].op.op = CEPH_OSD_OP_SCRUB_RESERVE;
hobject_t poid;
@@ -2930,14 +2966,16 @@ void PG::scrub_reserve_replicas()
MOSDSubOp *subop = new MOSDSubOp(reqid, info.pgid, poid, false, 0,
get_osdmap()->get_epoch(), osd->get_tid(), v);
subop->ops = scrub;
- osd->send_message_osd_cluster(acting[i], subop, get_osdmap()->get_epoch());
+ osd->send_message_osd_cluster(actingonly[i], subop, get_osdmap()->get_epoch());
}
}
void PG::scrub_unreserve_replicas()
{
- for (unsigned i=1; i<acting.size(); i++) {
- dout(10) << "scrub requesting unreserve from osd." << acting[i] << dendl;
+ //XXX: Don't see how we don't scrub during backfill
+ assert(backfill_targets.empty());
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ dout(10) << "scrub requesting unreserve from osd." << actingonly[i] << dendl;
vector<OSDOp> scrub(1);
scrub[0].op.op = CEPH_OSD_OP_SCRUB_UNRESERVE;
hobject_t poid;
@@ -2946,7 +2984,7 @@ void PG::scrub_unreserve_replicas()
MOSDSubOp *subop = new MOSDSubOp(reqid, info.pgid, poid, false, 0,
get_osdmap()->get_epoch(), osd->get_tid(), v);
subop->ops = scrub;
- osd->send_message_osd_cluster(acting[i], subop, get_osdmap()->get_epoch());
+ osd->send_message_osd_cluster(actingonly[i], subop, get_osdmap()->get_epoch());
}
}
@@ -3135,7 +3173,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer
bufferlist bv;
bv.push_back(po->attrs[OI_ATTR]);
object_info_t oi(bv);
- if (bad_peer != acting[0]) {
+ if (bad_peer != actingonly[0]) {
peer_missing[bad_peer].add(soid, oi.version, eversion_t());
} else {
// We should only be scrubbing if the PG is clean.
@@ -3274,12 +3312,13 @@ void PG::scrub(ThreadPool::TPHandle &handle)
if (!scrubber.active) {
OSDMapRef curmap = osd->get_osdmap();
scrubber.is_chunky = true;
- for (unsigned i=1; i<acting.size(); i++) {
- ConnectionRef con = osd->get_con_osd_cluster(acting[i], get_osdmap()->get_epoch());
+ assert(backfill_targets.empty());
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ ConnectionRef con = osd->get_con_osd_cluster(actingonly[i], get_osdmap()->get_epoch());
if (!con)
continue;
if (!con->has_feature(CEPH_FEATURE_CHUNKY_SCRUB)) {
- dout(20) << "OSD " << acting[i]
+ dout(20) << "OSD " << actingonly[i]
<< " does not support chunky scrubs, falling back to classic"
<< dendl;
scrubber.is_chunky = false;
@@ -3365,12 +3404,12 @@ void PG::classic_scrub(ThreadPool::TPHandle &handle)
* the primary has done a final scrub (which in turn can only happen if
* last_update_applied == info.last_update)
*/
- scrubber.waiting_on = acting.size();
- scrubber.waiting_on_whom.insert(acting.begin(), acting.end());
+ scrubber.waiting_on = actingonly.size();
+ scrubber.waiting_on_whom.insert(actingonly.begin(), actingonly.end());
// request maps from replicas
- for (unsigned i=1; i<acting.size(); i++) {
- _request_scrub_map_classic(acting[i], eversion_t());
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ _request_scrub_map_classic(actingonly[i], eversion_t());
}
// Unlocks and relocks...
@@ -3606,10 +3645,10 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
++scrubber.waiting_on;
// request maps from replicas
- for (unsigned i=1; i<acting.size(); i++) {
- _request_scrub_map(acting[i], scrubber.subset_last_update,
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ _request_scrub_map(actingonly[i], scrubber.subset_last_update,
scrubber.start, scrubber.end, scrubber.deep);
- scrubber.waiting_on_whom.insert(acting[i]);
+ scrubber.waiting_on_whom.insert(actingonly[i]);
++scrubber.waiting_on;
}
@@ -3936,14 +3975,14 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps,
++scrubber.shallow_errors;
else
++scrubber.deep_errors;
- errorstream << info.pgid << " osd." << acting[j->first]
+ errorstream << info.pgid << " osd." << actingonly[j->first]
<< ": soid " << *k << " " << ss.str() << std::endl;
}
} else {
cur_missing.insert(j->first);
++scrubber.shallow_errors;
errorstream << info.pgid
- << " osd." << acting[j->first]
+ << " osd." << actingonly[j->first]
<< " missing " << *k << std::endl;
}
}
@@ -3967,7 +4006,7 @@ void PG::scrub_compare_maps()
// construct authoritative scrub map for type specific scrubbing
ScrubMap authmap(scrubber.primary_scrubmap);
- if (acting.size() > 1) {
+ if (actingonly.size() > 1) {
dout(10) << "scrub comparing replica scrub maps" << dendl;
stringstream ss;
@@ -3976,13 +4015,13 @@ void PG::scrub_compare_maps()
map<hobject_t, int> authoritative;
map<int,ScrubMap *> maps;
- dout(2) << "scrub osd." << acting[0] << " has "
+ dout(2) << "scrub osd." << actingonly[0] << " has "
<< scrubber.primary_scrubmap.objects.size() << " items" << dendl;
maps[0] = &scrubber.primary_scrubmap;
- for (unsigned i=1; i<acting.size(); i++) {
- dout(2) << "scrub osd." << acting[i] << " has "
- << scrubber.received_maps[acting[i]].objects.size() << " items" << dendl;
- maps[i] = &scrubber.received_maps[acting[i]];
+ for (unsigned i=1; i<actingonly.size(); i++) {
+ dout(2) << "scrub osd." << actingonly[i] << " has "
+ << scrubber.received_maps[actingonly[i]].objects.size() << " items" << dendl;
+ maps[i] = &scrubber.received_maps[actingonly[i]];
}
_compare_scrubmaps(
@@ -4059,8 +4098,8 @@ void PG::scrub_process_inconsistent()
++j) {
repair_object(i->first,
&(i->second.first),
- acting[*j],
- acting[i->second.second]);
+ actingonly[*j],
+ actingonly[i->second.second]);
++scrubber.fixed;
}
}
@@ -4070,8 +4109,8 @@ void PG::scrub_process_inconsistent()
++j) {
repair_object(i->first,
&(i->second.first),
- acting[*j],
- acting[i->second.second]);
+ actingonly[*j],
+ actingonly[i->second.second]);
++scrubber.fixed;
}
}
@@ -4212,8 +4251,9 @@ void PG::share_pg_info()
dout(10) << "share_pg_info" << dendl;
// share new pg_info_t with replicas
- for (unsigned i=1; i<acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
if (peer_info.count(i)) {
peer_info[i].last_epoch_started = info.last_epoch_started;
peer_info[i].history.merge(info.history);
@@ -4240,9 +4280,9 @@ void PG::share_pg_log()
dout(10) << __func__ << dendl;
assert(is_primary());
- vector<int>::const_iterator a = acting.begin();
- assert(a != acting.end());
- vector<int>::const_iterator end = acting.end();
+ vector<int>::const_iterator a = actingbackfill.begin();
+ assert(a != actingbackfill.end());
+ vector<int>::const_iterator end = actingbackfill.end();
while (++a != end) {
int peer(*a);
pg_missing_t& pmissing(peer_missing[peer]);
@@ -4272,8 +4312,8 @@ void PG::update_history_from_master(pg_history_t new_history)
void PG::fulfill_info(int from, const pg_query_t &query,
pair<int, pg_info_t> &notify_info)
{
- assert(!acting.empty());
- assert(from == acting[0]);
+ assert(!actingonly.empty());
+ assert(from == actingonly[0]);
assert(query.type == pg_query_t::INFO);
// info
@@ -4283,8 +4323,8 @@ void PG::fulfill_info(int from, const pg_query_t &query,
void PG::fulfill_log(int from, const pg_query_t &query, epoch_t query_epoch)
{
- assert(!acting.empty());
- assert(from == acting[0]);
+ assert(!actingonly.empty());
+ assert(from == actingonly[0]);
assert(query.type != pg_query_t::INFO);
MOSDPGLog *mlog = new MOSDPGLog(get_osdmap()->get_epoch(),
@@ -4364,7 +4404,7 @@ bool PG::may_need_replay(const OSDMapRef osdmap) const
any_survived_interval = true;
}
else if (pinfo->up_from <= interval.first &&
- (std::find(acting.begin(), acting.end(), o) != acting.end() ||
+ (std::find(actingonly.begin(), actingonly.end(), o) != actingonly.end() ||
std::find(up.begin(), up.end(), o) != up.end())) {
dout(10) << "may_need_replay osd." << o
<< " up_from " << pinfo->up_from << " and is in acting|up,"
@@ -4407,7 +4447,7 @@ bool PG::is_split(OSDMapRef lastmap, OSDMapRef nextmap)
bool PG::acting_up_affected(const vector<int>& newup, const vector<int>& newacting)
{
- if (acting != newacting || up != newup) {
+ if (actingonly != newacting || up != newup) {
dout(20) << "acting_up_affected newup " << newup << " newacting " << newacting << dendl;
return true;
} else {
@@ -4472,25 +4512,27 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
vector<int> oldacting, oldup;
int oldrole = get_role();
int oldprimary = get_primary();
- acting.swap(oldacting);
+ actingonly.swap(oldacting);
up.swap(oldup);
up = newup;
- acting = newacting;
+ actingonly = newacting;
if (info.stats.up != up ||
- info.stats.acting != acting) {
+ info.stats.acting != actingonly) {
info.stats.up = up;
- info.stats.acting = acting;
+ info.stats.acting = actingonly;
info.stats.mapping_epoch = info.history.same_interval_since;
}
- if (up != acting)
+ //This will now be remapped during a backfill in cases
+ //that it would have been before.
+ if (up != actingonly)
state_set(PG_STATE_REMAPPED);
else
state_clear(PG_STATE_REMAPPED);
- int role = osdmap->calc_pg_role(osd->whoami, acting, acting.size());
+ int role = osdmap->calc_pg_role(osd->whoami, actingonly, actingonly.size());
set_role(role);
// did acting, up, primary|acker change?
@@ -4520,7 +4562,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
}
}
- if (oldacting != acting || oldup != up || is_split(lastmap, osdmap)) {
+ if (oldacting != actingonly || oldup != up || is_split(lastmap, osdmap)) {
info.history.same_interval_since = osdmap->get_epoch();
}
if (oldup != up) {
@@ -4531,7 +4573,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
}
dout(10) << " up " << oldup << " -> " << up
- << ", acting " << oldacting << " -> " << acting
+ << ", acting " << oldacting << " -> " << actingonly
<< ", role " << oldrole << " -> " << role << dendl;
// deactivate.
@@ -4542,6 +4584,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
peer_missing.clear();
peer_purged.clear();
+ actingbackfill.clear();
// reset primary state?
if (oldrole == 0 || get_role() == 0)
@@ -4584,7 +4627,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
// we need to announce
send_notify = true;
- dout(10) << *this << " " << oldacting << " -> " << acting
+ dout(10) << *this << " " << oldacting << " -> " << actingonly
<< ", acting primary "
<< oldprimary << " -> " << get_primary()
<< dendl;
@@ -4594,7 +4637,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
// i am (still) primary. but my replica set changed.
state_clear(PG_STATE_CLEAN);
- dout(10) << oldacting << " -> " << acting
+ dout(10) << oldacting << " -> " << actingonly
<< ", replicas changed" << dendl;
}
}
@@ -4603,9 +4646,9 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
osd->remove_want_pg_temp(info.pgid);
cancel_recovery();
- if (acting.empty() && !up.empty() && up[0] == osd->whoami) {
+ if (actingonly.empty() && !up.empty() && up[0] == osd->whoami) {
dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl;
- osd->queue_want_pg_temp(info.pgid, acting);
+ osd->queue_want_pg_temp(info.pgid, actingonly);
}
}
@@ -4658,8 +4701,8 @@ ostream& operator<<(ostream& out, const PG& pg)
{
out << "pg[" << pg.info
<< " " << pg.up;
- if (pg.acting != pg.up)
- out << "/" << pg.acting;
+ if (pg.actingonly != pg.up)
+ out << "/" << pg.actingonly;
out << " r=" << pg.get_role();
out << " lpr=" << pg.get_last_peering_reset();
@@ -5443,7 +5486,7 @@ PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_con
PG *pg = context< RecoveryMachine >().pg;
pg->state_set(PG_STATE_BACKFILL_WAIT);
ConnectionRef con = pg->osd->get_con_osd_cluster(
- pg->backfill_target, pg->get_osdmap()->get_epoch());
+ pg->get_backfill_target(), pg->get_osdmap()->get_epoch());
if (con) {
if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH
@@ -5565,7 +5608,7 @@ PG::RecoveryState::RepWaitRecoveryReserved::react(const RemoteRecoveryReserved &
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->send_message_osd_cluster(
- pg->acting[0],
+ pg->actingonly[0],
new MRecoveryReserve(
MRecoveryReserve::GRANT,
pg->info.pgid,
@@ -5625,7 +5668,7 @@ PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteBackfillReserved &
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->send_message_osd_cluster(
- pg->acting[0],
+ pg->actingonly[0],
new MBackfillReserve(
MBackfillReserve::GRANT,
pg->info.pgid,
@@ -5828,11 +5871,12 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx)
// if we finished backfill, all acting are active; recheck if
// DEGRADED is appropriate.
- if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->acting.size())
+ assert(pg->actingbackfill.size() > 0);
+ if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->actingbackfill.size())
pg->state_clear(PG_STATE_DEGRADED);
// adjust acting set? (e.g. because backfill completed...)
- if (pg->acting != pg->up && !pg->choose_acting(newest_update_osd))
+ if (pg->actingonly != pg->up && !pg->choose_acting(newest_update_osd))
assert(pg->want_acting.size());
assert(!pg->needs_recovery());
@@ -5881,8 +5925,8 @@ void PG::RecoveryState::Clean::exit()
PG::RecoveryState::Active::Active(my_context ctx)
: my_base(ctx),
NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active"),
- sorted_acting_set(context< RecoveryMachine >().pg->acting.begin(),
- context< RecoveryMachine >().pg->acting.end()),
+ sorted_acting_set(context< RecoveryMachine >().pg->actingonly.begin(),
+ context< RecoveryMachine >().pg->actingonly.end()),
all_replicas_activated(false)
{
context< RecoveryMachine >().log_enter(state_name);
@@ -5916,8 +5960,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
for (vector<int>::iterator p = pg->want_acting.begin();
p != pg->want_acting.end(); ++p) {
if (!advmap.osdmap->is_up(*p)) {
- assert((std::find(pg->acting.begin(), pg->acting.end(), *p) !=
- pg->acting.end()) ||
+ assert((std::find(pg->actingonly.begin(), pg->actingonly.end(), *p) !=
+ pg->actingonly.end()) ||
(std::find(pg->up.begin(), pg->up.end(), *p) !=
pg->up.end()));
}
@@ -5927,10 +5971,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap)
* this does not matter) */
if (advmap.lastmap->get_pg_size(pg->info.pgid) !=
pg->get_osdmap()->get_pg_size(pg->info.pgid)) {
- unsigned active = pg->acting.size();
- if (pg->backfill_target != -1)
- --active;
- if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= active)
+ if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->actingonly.size())
pg->state_clear(PG_STATE_DEGRADED);
else
pg->state_set(PG_STATE_DEGRADED);
@@ -6017,10 +6058,11 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe
assert(pg->is_active());
assert(pg->is_primary());
+ assert(pg->actingbackfill.size() > 0);
// don't update history (yet) if we are active and primary; the replica
// may be telling us they have activated (and committed) but we can't
// share that until _everyone_ does the same.
- if (pg->is_acting(infoevt.from)) {
+ if (pg->is_actingbackfill(infoevt.from)) {
assert(pg->info.history.last_epoch_started <
pg->info.history.same_interval_since);
assert(infoevt.info.history.last_epoch_started >=
@@ -6030,7 +6072,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe
pg->peer_activated.insert(infoevt.from);
}
- if (pg->peer_activated.size() == pg->acting.size()) {
+ if (pg->peer_activated.size() == pg->actingbackfill.size()) {
pg->all_activated_and_committed();
}
return discard_event();
@@ -6535,7 +6577,9 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx)
// how much log to request?
eversion_t request_log_from = pg->info.last_update;
- for (vector<int>::iterator p = pg->acting.begin() + 1; p != pg->acting.end(); ++p) {
+ assert(pg->actingbackfill.size() > 0);
+ for (vector<int>::iterator p = pg->actingbackfill.begin() + 1;
+ p != pg->actingbackfill.end(); ++p) {
pg_info_t& ri = pg->peer_info[*p];
if (ri.last_update >= best.log_tail && ri.last_update < request_log_from)
request_log_from = ri.last_update;
@@ -6719,8 +6763,9 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
context< RecoveryMachine >().log_enter(state_name);
PG *pg = context< RecoveryMachine >().pg;
- for (vector<int>::iterator i = pg->acting.begin() + 1;
- i != pg->acting.end();
+ assert(pg->actingbackfill.size() > 0);
+ for (vector<int>::iterator i = pg->actingbackfill.begin() + 1;
+ i != pg->actingbackfill.end();
++i) {
const pg_info_t& pi = pg->peer_info[*i];
diff --git a/src/osd/PG.h b/src/osd/PG.h
index dc11638fd4b..9b13f834080 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -333,7 +333,7 @@ public:
// primary state
public:
- vector<int> up, acting, want_acting;
+ vector<int> up, actingonly, want_acting, actingbackfill;
map<int,eversion_t> peer_last_complete_ondisk;
eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk
eversion_t pg_trim_to;
@@ -504,14 +504,18 @@ protected:
BackfillInterval backfill_info;
BackfillInterval peer_backfill_info;
- int backfill_target;
+ vector<int> backfill_targets;
bool backfill_reserved;
bool backfill_reserving;
friend class OSD;
public:
+ //Compatibility with single backfill target code
int get_backfill_target() const {
+ int backfill_target = -1;
+ if (backfill_targets.size() > 0)
+ backfill_target = backfill_targets[0];
return backfill_target;
}
@@ -556,16 +560,24 @@ public:
void clear_primary_state();
public:
+#if 0
+ //This function not used
bool is_acting(int osd) const {
- for (unsigned i=0; i<acting.size(); i++)
- if (acting[i] == osd) return true;
+ for (unsigned i=0; i<actingonly.size(); i++)
+ if (actingonly[i] == osd) return true;
return false;
}
+#endif
bool is_up(int osd) const {
for (unsigned i=0; i<up.size(); i++)
if (up[i] == osd) return true;
return false;
}
+ bool is_actingbackfill(int osd) const {
+ for (unsigned i=0; i<actingbackfill.size(); i++)
+ if (actingbackfill[i] == osd) return true;
+ return false;
+ }
bool needs_recovery() const;
bool needs_backfill() const;
@@ -587,10 +599,11 @@ public:
bool calc_min_last_complete_ondisk() {
eversion_t min = last_complete_ondisk;
- for (unsigned i=1; i<acting.size(); i++) {
- if (peer_last_complete_ondisk.count(acting[i]) == 0)
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ if (peer_last_complete_ondisk.count(actingbackfill[i]) == 0)
return false; // we don't have complete info
- eversion_t a = peer_last_complete_ondisk[acting[i]];
+ eversion_t a = peer_last_complete_ondisk[actingbackfill[i]];
if (a < min)
min = a;
}
@@ -622,7 +635,7 @@ public:
void trim_write_ahead();
map<int, pg_info_t>::const_iterator find_best_info(const map<int, pg_info_t> &infos) const;
- bool calc_acting(int& newest_update_osd, vector<int>& want) const;
+ bool calc_acting(int& newest_update_osd, vector<int>& want, vector<int>& backfill) const;
bool choose_acting(int& newest_update_osd);
void build_might_have_unfound();
void replay_queued_ops();
@@ -1637,9 +1650,13 @@ public:
public:
pg_t get_pgid() const { return info.pgid; }
+#if 0
+ //Not used
int get_nrep() const { return acting.size(); }
+#endif
- int get_primary() { return acting.empty() ? -1:acting[0]; }
+ // Rename actingonly -> acting
+ int get_primary() { return actingonly.empty() ? -1:actingonly[0]; }
int get_role() const { return role; }
void set_role(int r) { role = r; }
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h
index 408c589a08a..856a541271c 100644
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -96,6 +96,7 @@
virtual void queue_transaction(ObjectStore::Transaction *t) = 0;
virtual epoch_t get_epoch() = 0;
virtual const vector<int> &get_acting() = 0;
+ virtual const vector<int> &get_actingbackfill() = 0;
virtual std::string gen_dbg_prefix() const = 0;
virtual const map<hobject_t, set<int> > &get_missing_loc() = 0;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index fd4ffb77485..68e080c797a 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -222,7 +222,7 @@ void ReplicatedPG::on_peer_recover(
publish_stats_to_osd();
// done!
peer_missing[peer].got(soid, recovery_info.version);
- if (peer == backfill_target && backfills_in_flight.count(soid))
+ if (peer == get_backfill_target() && backfills_in_flight.count(soid))
backfills_in_flight.erase(soid);
}
@@ -297,15 +297,16 @@ bool ReplicatedPG::is_degraded_object(const hobject_t& soid)
{
if (pg_log.get_missing().missing.count(soid))
return true;
- for (unsigned i = 1; i < acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i = 1; i < actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
if (peer_missing.count(peer) &&
peer_missing[peer].missing.count(soid))
return true;
// Object is degraded if after last_backfill AND
// we have are backfilling it
- if (peer == backfill_target &&
+ if (peer == get_backfill_target() &&
peer_info[peer].last_backfill <= soid &&
backfill_pos >= soid &&
backfills_in_flight.count(soid))
@@ -330,8 +331,9 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef
<< ", recovering"
<< dendl;
eversion_t v;
- for (unsigned i = 1; i < acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i = 1; i < actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
if (peer_missing.count(peer) &&
peer_missing[peer].missing.count(soid)) {
v = peer_missing[peer].missing[soid].need;
@@ -461,9 +463,21 @@ int ReplicatedPG::do_command(cmdmap_t cmdmap, ostream& ss,
f->dump_unsigned("osd", *p);
f->close_section();
f->open_array_section("acting");
- for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p)
+ for (vector<int>::iterator p = actingonly.begin(); p != actingonly.end(); ++p)
f->dump_unsigned("osd", *p);
f->close_section();
+ if (backfill_targets.size() > 0) {
+ f->open_array_section("backfill_targets");
+ for (vector<int>::iterator p = backfill_targets.begin(); p != backfill_targets.end(); ++p)
+ f->dump_unsigned("osd", *p);
+ f->close_section();
+ }
+ if (actingbackfill.size() > 0) {
+ f->open_array_section("actingbackfill");
+ for (vector<int>::iterator p = actingbackfill.begin(); p != actingbackfill.end(); ++p)
+ f->dump_unsigned("osd", *p);
+ f->close_section();
+ }
f->open_object_section("info");
info.dump(f.get());
f->close_section();
@@ -1005,6 +1019,7 @@ void ReplicatedPG::do_op(OpRequestRef op)
// opposite is not a problem; if the target is after the line, we
// don't apply on the backfill_target and it doesn't matter.)
pg_info_t *backfill_target_info = NULL;
+ int backfill_target = get_backfill_target();
bool before_backfill = false;
if (backfill_target >= 0) {
backfill_target_info = &peer_info[backfill_target];
@@ -1568,7 +1583,7 @@ void ReplicatedPG::do_scan(
case MOSDPGScan::OP_SCAN_DIGEST:
{
int from = m->get_source().num();
- assert(from == backfill_target);
+ assert(from == get_backfill_target());
BackfillInterval& bi = peer_backfill_info;
bi.begin = m->begin;
bi.end = m->end;
@@ -4284,6 +4299,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
ctx->obc->ssc->snapset = ctx->new_snapset;
info.stats.stats.add(ctx->delta_stats, ctx->obc->obs.oi.category);
+ int backfill_target = get_backfill_target();
if (backfill_target >= 0) {
pg_info_t& pinfo = peer_info[backfill_target];
if (soid < pinfo.last_backfill)
@@ -4861,18 +4877,19 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now)
repop->v = ctx->at_version;
// add myself to gather set
- repop->waitfor_ack.insert(acting[0]);
- repop->waitfor_disk.insert(acting[0]);
+ repop->waitfor_ack.insert(actingonly[0]);
+ repop->waitfor_disk.insert(actingonly[0]);
int acks_wanted = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
- if (ctx->op && acting.size() > 1) {
+ assert(actingbackfill.size() > 0);
+ if (ctx->op && actingbackfill.size() > 1) {
ostringstream ss;
- ss << "waiting for subops from " << vector<int>(acting.begin() + 1, acting.end());
+ ss << "waiting for subops from " << vector<int>(actingbackfill.begin() + 1, actingbackfill.end());
ctx->op->mark_sub_op_sent(ss.str());
}
- for (unsigned i=1; i<acting.size(); i++) {
- int peer = acting[i];
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
pg_info_t &pinfo = peer_info[peer];
repop->waitfor_ack.insert(peer);
@@ -4889,6 +4906,7 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now)
assert(0 == "broken implementation, do not use");
}
+ int backfill_target = get_backfill_target();
// ship resulting transaction, log entries, and pg_stats
if (peer == backfill_target && soid >= backfill_pos &&
soid.pool == (int64_t)info.pgid.pool()) { // only skip normal (not temp pool=-1) objects
@@ -5532,7 +5550,7 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op)
// we better not be missing this.
assert(!pg_log.get_missing().is_missing(soid));
- int ackerosd = acting[0];
+ int ackerosd = actingonly[0];
op->mark_started();
@@ -7038,10 +7056,11 @@ eversion_t ReplicatedPG::pick_newest_available(const hobject_t& oid)
v = pg_log.get_missing().missing.find(oid)->second.have;
dout(10) << "pick_newest_available " << oid << " " << v << " on osd." << osd->whoami << " (local)" << dendl;
- for (unsigned i=1; i<acting.size(); ++i) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); ++i) {
+ int peer = actingbackfill[i];
if (!peer_missing[peer].is_missing(oid)) {
- assert(peer == backfill_target);
+ assert(peer == get_backfill_target());
continue;
}
eversion_t h = peer_missing[peer].missing[oid].have;
@@ -7307,17 +7326,16 @@ void ReplicatedPG::on_shutdown()
cancel_recovery();
}
+//XXX: For now only care about a single backfill at a time
void ReplicatedPG::on_activate()
{
- for (unsigned i = 1; i<acting.size(); i++) {
- if (peer_info[acting[i]].last_backfill != hobject_t::get_max()) {
- assert(backfill_target == -1);
- backfill_target = acting[i];
- backfill_pos = peer_info[acting[i]].last_backfill;
- dout(10) << " chose backfill target osd." << backfill_target
- << " from " << backfill_pos << dendl;
- }
- }
+ int backfill_target = get_backfill_target();
+ if (backfill_target == -1)
+ return;
+ backfill_pos = peer_info[backfill_target].last_backfill;
+ assert(backfill_pos != hobject_t::get_max());
+ dout(10) << " chose backfill target osd." << backfill_target
+ << " from " << backfill_pos << dendl;
}
void ReplicatedPG::on_change(ObjectStore::Transaction *t)
@@ -7512,6 +7530,7 @@ int ReplicatedPG::start_recovery_ops(
}
bool deferred_backfill = false;
+ int backfill_target = get_backfill_target();
if (recovering.empty() &&
state_test(PG_STATE_BACKFILL) &&
backfill_target >= 0 && started < max &&
@@ -7766,6 +7785,7 @@ int ReplicatedPG::prep_object_replica_pushes(
const hobject_t& soid, eversion_t v,
PGBackend::RecoveryHandle *h)
{
+ assert(is_primary());
dout(10) << __func__ << ": on " << soid << dendl;
// NOTE: we know we will get a valid oloc off of disk here.
@@ -7773,8 +7793,9 @@ int ReplicatedPG::prep_object_replica_pushes(
if (!obc) {
pg_log.missing_add(soid, v, eversion_t());
bool uhoh = true;
- for (unsigned i=1; i<acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
if (!peer_missing[peer].is_missing(soid, v)) {
missing_loc[soid].insert(peer);
missing_loc_sources.insert(peer);
@@ -7817,8 +7838,9 @@ int ReplicatedBackend::start_pushes(
{
int pushes = 0;
// who needs it?
- for (unsigned i=1; i<get_parent()->get_acting().size(); i++) {
- int peer = get_parent()->get_acting()[i];
+ assert(get_parent()->get_actingbackfill().size() > 0);
+ for (unsigned i=1; i<get_parent()->get_actingbackfill().size(); i++) {
+ int peer = get_parent()->get_actingbackfill()[i];
map<int, pg_missing_t>::const_iterator j =
get_parent()->get_peer_missing().find(peer);
assert(j != get_parent()->get_peer_missing().end());
@@ -7841,8 +7863,9 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle)
PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op();
// this is FAR from an optimal recovery order. pretty lame, really.
- for (unsigned i=1; i<acting.size(); i++) {
- int peer = acting[i];
+ assert(actingbackfill.size() > 0);
+ for (unsigned i=1; i<actingbackfill.size(); i++) {
+ int peer = actingbackfill[i];
map<int, pg_missing_t>::const_iterator pm = peer_missing.find(peer);
assert(pm != peer_missing.end());
map<int, pg_info_t>::const_iterator pi = peer_info.find(peer);
@@ -7919,6 +7942,7 @@ int ReplicatedPG::recover_backfill(
ThreadPool::TPHandle &handle)
{
dout(10) << "recover_backfill (" << max << ")" << dendl;
+ int backfill_target = get_backfill_target();
assert(backfill_target >= 0);
pg_info_t& pinfo = peer_info[backfill_target];
@@ -8106,7 +8130,7 @@ void ReplicatedPG::prep_backfill_object_push(
dout(10) << "push_backfill_object " << oid << " v " << v << " to osd." << peer << dendl;
backfills_in_flight.insert(oid);
- map<int, pg_missing_t>::iterator bpm = peer_missing.find(backfill_target);
+ map<int, pg_missing_t>::iterator bpm = peer_missing.find(get_backfill_target());
assert(bpm != peer_missing.end());
bpm->second.add(oid, eversion_t(), eversion_t());
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 00216170516..77211fef474 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -279,8 +279,12 @@ public:
epoch_t get_epoch() {
return get_osdmap()->get_epoch();
}
+ //Not used
const vector<int> &get_acting() {
- return acting;
+ return actingonly;
+ }
+ const vector<int> &get_actingbackfill() {
+ return actingbackfill;
}
std::string gen_dbg_prefix() const { return gen_prefix(); }