diff options
author | David Zafman <david.zafman@inktank.com> | 2013-10-11 15:53:49 -0700 |
---|---|---|
committer | David Zafman <david.zafman@inktank.com> | 2013-10-22 10:10:18 -0700 |
commit | 5296752193159b8f2b783741b13ac83070ea895a (patch) | |
tree | 6c3c306045cb01b22e432e3d453831c7c4e34a2c | |
parent | bed222280647ab8cd9a3a419814886a4fe604264 (diff) | |
download | ceph-5296752193159b8f2b783741b13ac83070ea895a.tar.gz |
Backfill peers should not be included in the acting set
Create actingbackfill in choose_acting()
Use first backfill target as previously
Add asserts to catch inappropriate use of actingbackfill
fixes: #5855
Signed-off-by: David Zafman <david.zafman@inktank.com>
-rw-r--r-- | src/osd/OSD.cc | 17 | ||||
-rw-r--r-- | src/osd/PG.cc | 307 | ||||
-rw-r--r-- | src/osd/PG.h | 35 | ||||
-rw-r--r-- | src/osd/PGBackend.h | 1 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 92 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.h | 6 |
6 files changed, 276 insertions, 182 deletions
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 7695d10b8e7..81a33a72788 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2036,8 +2036,8 @@ void OSD::load_pgs() pg->reg_next_scrub(); // generate state for PG's current mapping - pg->get_osdmap()->pg_to_up_acting_osds(pgid, pg->up, pg->acting); - int role = pg->get_osdmap()->calc_pg_role(whoami, pg->acting); + pg->get_osdmap()->pg_to_up_acting_osds(pgid, pg->up, pg->actingonly); + int role = pg->get_osdmap()->calc_pg_role(whoami, pg->actingonly); pg->set_role(role); PG::RecoveryCtx rctx(0, 0, 0, 0, 0, 0); @@ -2280,7 +2280,7 @@ void OSD::handle_pg_peering_evt( true, old_pg_state->role, old_pg_state->up, - old_pg_state->acting, + old_pg_state->actingonly, old_pg_state->info.history, old_pg_state->past_intervals, *rctx.transaction); @@ -2309,7 +2309,7 @@ void OSD::handle_pg_peering_evt( true, old_pg_state->role, old_pg_state->up, - old_pg_state->acting, + old_pg_state->actingonly, old_pg_state->info.history, old_pg_state->past_intervals, *rctx.transaction @@ -6016,8 +6016,11 @@ void OSD::dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg) bool OSD::compat_must_dispatch_immediately(PG *pg) { assert(pg->is_locked()); - for (vector<int>::iterator i = pg->acting.begin(); - i != pg->acting.end(); + vector<int> *tmpacting = &pg->actingonly; + if (pg->actingbackfill.size() > 0) + tmpacting = &pg->actingbackfill; + for (vector<int>::iterator i = tmpacting->begin(); + i != tmpacting->end(); ++i) { if (*i == whoami) continue; @@ -6878,7 +6881,7 @@ void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op) clog.warn() << m->get_source_inst() << " misdirected " << m->get_reqid() << " pg " << m->get_pg() << " to osd." << whoami - << " not " << pg->acting + << " not " << pg->actingonly << " in e" << m->get_map_epoch() << "/" << osdmap->get_epoch() << "\n"; reply_op_error(op, -ENXIO); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 8ba5b6cc2ff..e745a3f5f01 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -170,7 +170,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, need_flush(false), last_peering_reset(0), heartbeat_peer_lock("PG::heartbeat_peer_lock"), - backfill_target(-1), backfill_reserved(0), backfill_reserving(0), flushed(false), @@ -288,7 +287,7 @@ bool PG::proc_replica_info(int from, const pg_info_t &oinfo) reg_next_scrub(); // stray? - if (!is_acting(from)) { + if (!is_actingbackfill(from)) { dout(10) << " osd." << from << " has stray content: " << oinfo << dendl; stray_set.insert(from); if (is_clean()) { @@ -484,8 +483,9 @@ bool PG::needs_recovery() const ret = true; } - vector<int>::const_iterator end = acting.end(); - vector<int>::const_iterator a = acting.begin(); + assert(actingbackfill.size() > 0); + vector<int>::const_iterator end = actingbackfill.end(); + vector<int>::const_iterator a = actingbackfill.begin(); assert(a != end); ++a; for (; a != end; ++a) { @@ -513,10 +513,10 @@ bool PG::needs_backfill() const bool ret = false; - vector<int>::const_iterator end = acting.end(); - vector<int>::const_iterator a = acting.begin(); - assert(a != end); - ++a; + //We can assume that only possible osds that need backfill + //are on the backfill_targets vector. + vector<int>::const_iterator end = backfill_targets.end(); + vector<int>::const_iterator a = backfill_targets.begin(); for (; a != end; ++a) { int peer = *a; map<int,pg_info_t>::const_iterator pi = peer_info.find(peer); @@ -699,7 +699,7 @@ void PG::build_prior(std::auto_ptr<PriorSet> &prior_set) prior_set.reset(new PriorSet(*get_osdmap(), past_intervals, up, - acting, + actingonly, info, this)); PriorSet &prior(*prior_set.get()); @@ -827,7 +827,8 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t> } } // prefer current primary (usually the caller), all things being equal - if (p->first == acting[0]) { + //rename actingonly -> acting + if (p->first == actingonly[0]) { dout(10) << "calc_acting prefer osd." << p->first << " because it is current primary" << dendl; best = p; @@ -844,7 +845,7 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t> * incomplete, or another osd has a longer tail that allows us to * bring other up nodes up to date. */ -bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const +bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want, vector<int>& backfill) const { map<int, pg_info_t> all_info(peer_info.begin(), peer_info.end()); all_info[osd->whoami] = info; @@ -856,7 +857,8 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const map<int, pg_info_t>::const_iterator newest_update_osd = find_best_info(all_info); if (newest_update_osd == all_info.end()) { - if (up != acting) { + //Is actingbackfill even set in this case? + if (up != actingonly) { dout(10) << "calc_acting no suitable info found (incomplete backfills?), reverting to up" << dendl; want = up; return true; @@ -908,7 +910,6 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const << " with " << primary->second << dendl; want.push_back(primary->first); unsigned usable = 1; - unsigned backfill = 0; // select replicas that have log contiguity with primary. // prefer up, then acting, then any peer_info osds @@ -919,13 +920,8 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const continue; const pg_info_t &cur_info = all_info.find(*i)->second; if (cur_info.is_incomplete() || cur_info.last_update < primary->second.log_tail) { - if (backfill < 1) { - dout(10) << " osd." << *i << " (up) accepted (backfill) " << cur_info << dendl; - want.push_back(*i); - backfill++; - } else { - dout(10) << " osd." << *i << " (up) rejected" << cur_info << dendl; - } + dout(10) << " osd." << *i << " (up) backfill " << cur_info << dendl; + backfill.push_back(*i); } else { want.push_back(*i); usable++; @@ -933,8 +929,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const } } - for (vector<int>::const_iterator i = acting.begin(); - i != acting.end(); + //XXX: This no longer gets backfill OSDs, so I assume it is + //covered by the up OSDs. + for (vector<int>::const_iterator i = actingonly.begin(); + i != actingonly.end(); ++i) { if (usable >= get_osdmap()->get_pg_size(info.pgid)) break; @@ -956,6 +954,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const } } + //XXX: Should we go through backfill_targets here, but that hasn't been + //propogated through the mons. + //Ignore up and acting from above + for (map<int,pg_info_t>::const_iterator i = all_info.begin(); i != all_info.end(); ++i) { @@ -968,9 +970,10 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const vector<int>::const_iterator up_it = find(up.begin(), up.end(), i->first); if (up_it != up.end()) continue; - vector<int>::const_iterator acting_it = find(acting.begin(), acting.end(), i->first); - if (acting_it != acting.end()) + vector<int>::const_iterator acting_it = find(actingonly.begin(), actingonly.end(), i->first); + if (acting_it != actingonly.end()) continue; + //XXX: Ignore backfill_targets items if (i->second.is_incomplete() || i->second.last_update < primary->second.log_tail) { dout(10) << " osd." << i->first << " (stray) REJECTED " << i->second << dendl; @@ -992,33 +995,53 @@ bool PG::calc_acting(int& newest_update_osd_id, vector<int>& want) const */ bool PG::choose_acting(int& newest_update_osd) { - vector<int> want; + vector<int> want, backfill; - if (!calc_acting(newest_update_osd, want)) { + if (!calc_acting(newest_update_osd, want, backfill)) { dout(10) << "choose_acting failed" << dendl; assert(want_acting.empty()); return false; } - if (want.size() < pool.info.min_size) { + //For now we only backfill 1 at a time as before + if (!backfill.empty()) + backfill.resize(1); + + //This might cause a problem if min_size is large + //and we need to backfill more than 1 osd. Older + //code would only include 1 backfill osd and now we + //have the resize above. + if (want.size() + backfill.size() < pool.info.min_size) { want_acting.clear(); return false; } - if (want != acting) { - dout(10) << "choose_acting want " << want << " != acting " << acting + if (want != actingonly) { + dout(10) << "choose_acting want " << want << " != acting " << actingonly << ", requesting pg_temp change" << dendl; want_acting = want; + if (want == up) { + //There can't be any pending backfill if + //want is the same as crush map up OSDs. + assert(backfill.empty()); vector<int> empty; osd->queue_want_pg_temp(info.pgid, empty); } else osd->queue_want_pg_temp(info.pgid, want); return false; - } else { - want_acting.clear(); } - dout(10) << "choose_acting want " << want << " (== acting)" << dendl; + want_acting.clear(); + //We can only get here when new interval has arrived and + //we've accepted the acting set. Now we can create + //actingbackfill and backfill_targets vectors. + assert(actingbackfill.size() == 0); + assert(backfill_targets.size() == 0); + actingbackfill = actingonly; + actingbackfill.insert(actingbackfill.end(), backfill.begin(), backfill.end()); + backfill_targets = backfill; + dout(10) << "choose_acting want " << want << " (== acting) backfill_targets " + << backfill << dendl; return true; } @@ -1182,8 +1205,9 @@ void PG::activate(ObjectStore::Transaction& t, // count replicas that are not backfilling unsigned active = 1; - for (unsigned i=1; i<acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + int peer = actingbackfill[i]; assert(peer_info.count(peer)); pg_info_t& pi = peer_info[peer]; @@ -1273,8 +1297,10 @@ void PG::activate(ObjectStore::Transaction& t, } } + assert(active == actingonly.size()); + // degraded? - if (get_osdmap()->get_pg_size(info.pgid) > active) + if (get_osdmap()->get_pg_size(info.pgid) > actingonly.size()) state_set(PG_STATE_DEGRADED); // all clean? @@ -1437,7 +1463,8 @@ void PG::_activate_committed(epoch_t e) dout(10) << "_activate_committed " << e << " peer_activated now " << peer_activated << " last_epoch_started " << info.history.last_epoch_started << " same_interval_since " << info.history.same_interval_since << dendl; - if (peer_activated.size() == acting.size()) + assert(actingbackfill.size() > 0); + if (peer_activated.size() == actingbackfill.size()) all_activated_and_committed(); } else { dout(10) << "_activate_committed " << e << " telling primary" << dendl; @@ -1447,7 +1474,7 @@ void PG::_activate_committed(epoch_t e) info); i.info.history.last_epoch_started = e; m->pg_list.push_back(make_pair(i, pg_interval_map_t())); - osd->send_message_osd_cluster(acting[0], m, get_osdmap()->get_epoch()); + osd->send_message_osd_cluster(actingonly[0], m, get_osdmap()->get_epoch()); } if (dirty_info) { @@ -1469,7 +1496,8 @@ void PG::all_activated_and_committed() { dout(10) << "all_activated_and_committed" << dendl; assert(is_primary()); - assert(peer_activated.size() == acting.size()); + assert(peer_activated.size() == actingbackfill.size()); + assert(actingbackfill.size() > 0); // info.last_epoch_started is set during activate() info.history.last_epoch_started = info.last_epoch_started; @@ -1525,8 +1553,8 @@ void PG::mark_clean() { // only mark CLEAN if we have the desired number of replicas AND we // are not remapped. - if (acting.size() == get_osdmap()->get_pg_size(info.pgid) && - up == acting) + if (actingonly.size() == get_osdmap()->get_pg_size(info.pgid) && + up == actingonly) state_set(PG_STATE_CLEAN); // NOTE: this is actually a bit premature: we haven't purged the @@ -1704,8 +1732,10 @@ void PG::split_into(pg_t child_pgid, PG *child, unsigned split_bits) child->snap_trimq = snap_trimq; - get_osdmap()->pg_to_up_acting_osds(child->info.pgid, child->up, child->acting); - child->role = get_osdmap()->calc_pg_role(osd->whoami, child->acting); + //What to do during split? + //XXX: Can there be backfill going on or not? + get_osdmap()->pg_to_up_acting_osds(child->info.pgid, child->up, child->actingonly); + child->role = get_osdmap()->calc_pg_role(osd->whoami, child->actingonly); if (get_primary() != child->get_primary()) child->info.history.same_primary_since = get_osdmap()->get_epoch(); @@ -1736,7 +1766,7 @@ void PG::clear_recovery_state() finish_recovery_op(soid, true); } - backfill_target = -1; + backfill_targets.clear(); backfill_info.clear(); peer_backfill_info.clear(); waiting_on_backfill = false; @@ -1802,11 +1832,12 @@ void PG::clear_probe_targets() void PG::update_heartbeat_peers() { assert(is_locked()); + vector <int> hbpeers; set<int> new_peers; if (role == 0) { - for (unsigned i=0; i<acting.size(); i++) - new_peers.insert(acting[i]); + for (unsigned i=0; i<actingonly.size(); i++) + new_peers.insert(actingonly[i]); for (unsigned i=0; i<up.size(); i++) new_peers.insert(up[i]); for (map<int,pg_info_t>::iterator p = peer_info.begin(); p != peer_info.end(); ++p) @@ -1875,7 +1906,7 @@ void PG::publish_stats_to_osd() pg_stats_publish.stats.add(unstable_stats); // calc copies, degraded - unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), acting.size()); + unsigned target = MAX(get_osdmap()->get_pg_size(info.pgid), actingbackfill.size()); pg_stats_publish.stats.calc_copies(target); pg_stats_publish.stats.sum.num_objects_degraded = 0; if ((is_degraded() || !is_clean()) && is_active()) { @@ -1885,23 +1916,24 @@ void PG::publish_stats_to_osd() uint64_t degraded = 0; - // if the acting set is smaller than we want, add in those missing replicas - if (acting.size() < target) - degraded += (target - acting.size()) * num_objects; + // if the actingbackfill set is smaller than we want, add in those missing replicas + if (actingbackfill.size() < target) + degraded += (target - actingbackfill.size()) * num_objects; // missing on primary pg_stats_publish.stats.sum.num_objects_missing_on_primary = pg_log.get_missing().num_missing(); degraded += pg_log.get_missing().num_missing(); - for (unsigned i=1; i<acting.size(); i++) { - assert(peer_missing.count(acting[i])); + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + assert(peer_missing.count(actingbackfill[i])); // in missing set - degraded += peer_missing[acting[i]].num_missing(); + degraded += peer_missing[actingbackfill[i]].num_missing(); // not yet backfilled - degraded += num_objects - peer_info[acting[i]].stats.stats.sum.num_objects; + degraded += num_objects - peer_info[actingbackfill[i]].stats.stats.sum.num_objects; } pg_stats_publish.stats.sum.num_objects_degraded = degraded; pg_stats_publish.stats.sum.num_objects_unfound = get_num_unfound(); @@ -1955,14 +1987,14 @@ void PG::init(int role, vector<int>& newup, vector<int>& newacting, << dendl; set_role(role); - acting = newacting; + actingonly = newacting; up = newup; info.history = history; past_intervals.swap(pi); info.stats.up = up; - info.stats.acting = acting; + info.stats.acting = actingonly; info.stats.mapping_epoch = info.history.same_interval_since; if (backfill) { @@ -2213,11 +2245,13 @@ void PG::write_if_dirty(ObjectStore::Transaction& t) void PG::trim_peers() { + assert(is_primary()); calc_trim_to(); dout(10) << "trim_peers " << pg_trim_to << dendl; if (pg_trim_to != eversion_t()) { - for (unsigned i=1; i<acting.size(); i++) - osd->send_message_osd_cluster(acting[i], + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) + osd->send_message_osd_cluster(actingbackfill[i], new MOSDPGTrim(get_osdmap()->get_epoch(), info.pgid, pg_trim_to), get_osdmap()->get_epoch()); @@ -2590,7 +2624,7 @@ bool PG::sched_scrub() clear_scrub_reserved(); scrub_unreserve_replicas(); ret = false; - } else if (scrubber.reserved_peers.size() == acting.size()) { + } else if (scrubber.reserved_peers.size() == actingonly.size()) { dout(20) << "sched_scrub: success, reserved self and replicas" << dendl; if (time_for_deep) { dout(10) << "sched_scrub: scrub will be deep" << dendl; @@ -2888,7 +2922,7 @@ void PG::sub_op_scrub_stop(OpRequestRef op) void PG::reject_reservation() { osd->send_message_osd_cluster( - acting[0], + actingonly[0], new MBackfillReserve( MBackfillReserve::REJECT, info.pgid, @@ -2920,8 +2954,10 @@ void PG::clear_scrub_reserved() void PG::scrub_reserve_replicas() { - for (unsigned i=1; i<acting.size(); i++) { - dout(10) << "scrub requesting reserve from osd." << acting[i] << dendl; + //XXX: Don't see how we don't scrub during backfill + assert(backfill_targets.empty()); + for (unsigned i=1; i<actingonly.size(); i++) { + dout(10) << "scrub requesting reserve from osd." << actingonly[i] << dendl; vector<OSDOp> scrub(1); scrub[0].op.op = CEPH_OSD_OP_SCRUB_RESERVE; hobject_t poid; @@ -2930,14 +2966,16 @@ void PG::scrub_reserve_replicas() MOSDSubOp *subop = new MOSDSubOp(reqid, info.pgid, poid, false, 0, get_osdmap()->get_epoch(), osd->get_tid(), v); subop->ops = scrub; - osd->send_message_osd_cluster(acting[i], subop, get_osdmap()->get_epoch()); + osd->send_message_osd_cluster(actingonly[i], subop, get_osdmap()->get_epoch()); } } void PG::scrub_unreserve_replicas() { - for (unsigned i=1; i<acting.size(); i++) { - dout(10) << "scrub requesting unreserve from osd." << acting[i] << dendl; + //XXX: Don't see how we don't scrub during backfill + assert(backfill_targets.empty()); + for (unsigned i=1; i<actingonly.size(); i++) { + dout(10) << "scrub requesting unreserve from osd." << actingonly[i] << dendl; vector<OSDOp> scrub(1); scrub[0].op.op = CEPH_OSD_OP_SCRUB_UNRESERVE; hobject_t poid; @@ -2946,7 +2984,7 @@ void PG::scrub_unreserve_replicas() MOSDSubOp *subop = new MOSDSubOp(reqid, info.pgid, poid, false, 0, get_osdmap()->get_epoch(), osd->get_tid(), v); subop->ops = scrub; - osd->send_message_osd_cluster(acting[i], subop, get_osdmap()->get_epoch()); + osd->send_message_osd_cluster(actingonly[i], subop, get_osdmap()->get_epoch()); } } @@ -3135,7 +3173,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer bufferlist bv; bv.push_back(po->attrs[OI_ATTR]); object_info_t oi(bv); - if (bad_peer != acting[0]) { + if (bad_peer != actingonly[0]) { peer_missing[bad_peer].add(soid, oi.version, eversion_t()); } else { // We should only be scrubbing if the PG is clean. @@ -3274,12 +3312,13 @@ void PG::scrub(ThreadPool::TPHandle &handle) if (!scrubber.active) { OSDMapRef curmap = osd->get_osdmap(); scrubber.is_chunky = true; - for (unsigned i=1; i<acting.size(); i++) { - ConnectionRef con = osd->get_con_osd_cluster(acting[i], get_osdmap()->get_epoch()); + assert(backfill_targets.empty()); + for (unsigned i=1; i<actingonly.size(); i++) { + ConnectionRef con = osd->get_con_osd_cluster(actingonly[i], get_osdmap()->get_epoch()); if (!con) continue; if (!con->has_feature(CEPH_FEATURE_CHUNKY_SCRUB)) { - dout(20) << "OSD " << acting[i] + dout(20) << "OSD " << actingonly[i] << " does not support chunky scrubs, falling back to classic" << dendl; scrubber.is_chunky = false; @@ -3365,12 +3404,12 @@ void PG::classic_scrub(ThreadPool::TPHandle &handle) * the primary has done a final scrub (which in turn can only happen if * last_update_applied == info.last_update) */ - scrubber.waiting_on = acting.size(); - scrubber.waiting_on_whom.insert(acting.begin(), acting.end()); + scrubber.waiting_on = actingonly.size(); + scrubber.waiting_on_whom.insert(actingonly.begin(), actingonly.end()); // request maps from replicas - for (unsigned i=1; i<acting.size(); i++) { - _request_scrub_map_classic(acting[i], eversion_t()); + for (unsigned i=1; i<actingonly.size(); i++) { + _request_scrub_map_classic(actingonly[i], eversion_t()); } // Unlocks and relocks... @@ -3606,10 +3645,10 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle) ++scrubber.waiting_on; // request maps from replicas - for (unsigned i=1; i<acting.size(); i++) { - _request_scrub_map(acting[i], scrubber.subset_last_update, + for (unsigned i=1; i<actingonly.size(); i++) { + _request_scrub_map(actingonly[i], scrubber.subset_last_update, scrubber.start, scrubber.end, scrubber.deep); - scrubber.waiting_on_whom.insert(acting[i]); + scrubber.waiting_on_whom.insert(actingonly[i]); ++scrubber.waiting_on; } @@ -3936,14 +3975,14 @@ void PG::_compare_scrubmaps(const map<int,ScrubMap*> &maps, ++scrubber.shallow_errors; else ++scrubber.deep_errors; - errorstream << info.pgid << " osd." << acting[j->first] + errorstream << info.pgid << " osd." << actingonly[j->first] << ": soid " << *k << " " << ss.str() << std::endl; } } else { cur_missing.insert(j->first); ++scrubber.shallow_errors; errorstream << info.pgid - << " osd." << acting[j->first] + << " osd." << actingonly[j->first] << " missing " << *k << std::endl; } } @@ -3967,7 +4006,7 @@ void PG::scrub_compare_maps() // construct authoritative scrub map for type specific scrubbing ScrubMap authmap(scrubber.primary_scrubmap); - if (acting.size() > 1) { + if (actingonly.size() > 1) { dout(10) << "scrub comparing replica scrub maps" << dendl; stringstream ss; @@ -3976,13 +4015,13 @@ void PG::scrub_compare_maps() map<hobject_t, int> authoritative; map<int,ScrubMap *> maps; - dout(2) << "scrub osd." << acting[0] << " has " + dout(2) << "scrub osd." << actingonly[0] << " has " << scrubber.primary_scrubmap.objects.size() << " items" << dendl; maps[0] = &scrubber.primary_scrubmap; - for (unsigned i=1; i<acting.size(); i++) { - dout(2) << "scrub osd." << acting[i] << " has " - << scrubber.received_maps[acting[i]].objects.size() << " items" << dendl; - maps[i] = &scrubber.received_maps[acting[i]]; + for (unsigned i=1; i<actingonly.size(); i++) { + dout(2) << "scrub osd." << actingonly[i] << " has " + << scrubber.received_maps[actingonly[i]].objects.size() << " items" << dendl; + maps[i] = &scrubber.received_maps[actingonly[i]]; } _compare_scrubmaps( @@ -4059,8 +4098,8 @@ void PG::scrub_process_inconsistent() ++j) { repair_object(i->first, &(i->second.first), - acting[*j], - acting[i->second.second]); + actingonly[*j], + actingonly[i->second.second]); ++scrubber.fixed; } } @@ -4070,8 +4109,8 @@ void PG::scrub_process_inconsistent() ++j) { repair_object(i->first, &(i->second.first), - acting[*j], - acting[i->second.second]); + actingonly[*j], + actingonly[i->second.second]); ++scrubber.fixed; } } @@ -4212,8 +4251,9 @@ void PG::share_pg_info() dout(10) << "share_pg_info" << dendl; // share new pg_info_t with replicas - for (unsigned i=1; i<acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + int peer = actingbackfill[i]; if (peer_info.count(i)) { peer_info[i].last_epoch_started = info.last_epoch_started; peer_info[i].history.merge(info.history); @@ -4240,9 +4280,9 @@ void PG::share_pg_log() dout(10) << __func__ << dendl; assert(is_primary()); - vector<int>::const_iterator a = acting.begin(); - assert(a != acting.end()); - vector<int>::const_iterator end = acting.end(); + vector<int>::const_iterator a = actingbackfill.begin(); + assert(a != actingbackfill.end()); + vector<int>::const_iterator end = actingbackfill.end(); while (++a != end) { int peer(*a); pg_missing_t& pmissing(peer_missing[peer]); @@ -4272,8 +4312,8 @@ void PG::update_history_from_master(pg_history_t new_history) void PG::fulfill_info(int from, const pg_query_t &query, pair<int, pg_info_t> ¬ify_info) { - assert(!acting.empty()); - assert(from == acting[0]); + assert(!actingonly.empty()); + assert(from == actingonly[0]); assert(query.type == pg_query_t::INFO); // info @@ -4283,8 +4323,8 @@ void PG::fulfill_info(int from, const pg_query_t &query, void PG::fulfill_log(int from, const pg_query_t &query, epoch_t query_epoch) { - assert(!acting.empty()); - assert(from == acting[0]); + assert(!actingonly.empty()); + assert(from == actingonly[0]); assert(query.type != pg_query_t::INFO); MOSDPGLog *mlog = new MOSDPGLog(get_osdmap()->get_epoch(), @@ -4364,7 +4404,7 @@ bool PG::may_need_replay(const OSDMapRef osdmap) const any_survived_interval = true; } else if (pinfo->up_from <= interval.first && - (std::find(acting.begin(), acting.end(), o) != acting.end() || + (std::find(actingonly.begin(), actingonly.end(), o) != actingonly.end() || std::find(up.begin(), up.end(), o) != up.end())) { dout(10) << "may_need_replay osd." << o << " up_from " << pinfo->up_from << " and is in acting|up," @@ -4407,7 +4447,7 @@ bool PG::is_split(OSDMapRef lastmap, OSDMapRef nextmap) bool PG::acting_up_affected(const vector<int>& newup, const vector<int>& newacting) { - if (acting != newacting || up != newup) { + if (actingonly != newacting || up != newup) { dout(20) << "acting_up_affected newup " << newup << " newacting " << newacting << dendl; return true; } else { @@ -4472,25 +4512,27 @@ void PG::start_peering_interval(const OSDMapRef lastmap, vector<int> oldacting, oldup; int oldrole = get_role(); int oldprimary = get_primary(); - acting.swap(oldacting); + actingonly.swap(oldacting); up.swap(oldup); up = newup; - acting = newacting; + actingonly = newacting; if (info.stats.up != up || - info.stats.acting != acting) { + info.stats.acting != actingonly) { info.stats.up = up; - info.stats.acting = acting; + info.stats.acting = actingonly; info.stats.mapping_epoch = info.history.same_interval_since; } - if (up != acting) + //This will now be remapped during a backfill in cases + //that it would have been before. + if (up != actingonly) state_set(PG_STATE_REMAPPED); else state_clear(PG_STATE_REMAPPED); - int role = osdmap->calc_pg_role(osd->whoami, acting, acting.size()); + int role = osdmap->calc_pg_role(osd->whoami, actingonly, actingonly.size()); set_role(role); // did acting, up, primary|acker change? @@ -4520,7 +4562,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, } } - if (oldacting != acting || oldup != up || is_split(lastmap, osdmap)) { + if (oldacting != actingonly || oldup != up || is_split(lastmap, osdmap)) { info.history.same_interval_since = osdmap->get_epoch(); } if (oldup != up) { @@ -4531,7 +4573,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, } dout(10) << " up " << oldup << " -> " << up - << ", acting " << oldacting << " -> " << acting + << ", acting " << oldacting << " -> " << actingonly << ", role " << oldrole << " -> " << role << dendl; // deactivate. @@ -4542,6 +4584,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, peer_missing.clear(); peer_purged.clear(); + actingbackfill.clear(); // reset primary state? if (oldrole == 0 || get_role() == 0) @@ -4584,7 +4627,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, // we need to announce send_notify = true; - dout(10) << *this << " " << oldacting << " -> " << acting + dout(10) << *this << " " << oldacting << " -> " << actingonly << ", acting primary " << oldprimary << " -> " << get_primary() << dendl; @@ -4594,7 +4637,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, // i am (still) primary. but my replica set changed. state_clear(PG_STATE_CLEAN); - dout(10) << oldacting << " -> " << acting + dout(10) << oldacting << " -> " << actingonly << ", replicas changed" << dendl; } } @@ -4603,9 +4646,9 @@ void PG::start_peering_interval(const OSDMapRef lastmap, osd->remove_want_pg_temp(info.pgid); cancel_recovery(); - if (acting.empty() && !up.empty() && up[0] == osd->whoami) { + if (actingonly.empty() && !up.empty() && up[0] == osd->whoami) { dout(10) << " acting empty, but i am up[0], clearing pg_temp" << dendl; - osd->queue_want_pg_temp(info.pgid, acting); + osd->queue_want_pg_temp(info.pgid, actingonly); } } @@ -4658,8 +4701,8 @@ ostream& operator<<(ostream& out, const PG& pg) { out << "pg[" << pg.info << " " << pg.up; - if (pg.acting != pg.up) - out << "/" << pg.acting; + if (pg.actingonly != pg.up) + out << "/" << pg.actingonly; out << " r=" << pg.get_role(); out << " lpr=" << pg.get_last_peering_reset(); @@ -5443,7 +5486,7 @@ PG::RecoveryState::WaitRemoteBackfillReserved::WaitRemoteBackfillReserved(my_con PG *pg = context< RecoveryMachine >().pg; pg->state_set(PG_STATE_BACKFILL_WAIT); ConnectionRef con = pg->osd->get_con_osd_cluster( - pg->backfill_target, pg->get_osdmap()->get_epoch()); + pg->get_backfill_target(), pg->get_osdmap()->get_epoch()); if (con) { if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) { unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH @@ -5565,7 +5608,7 @@ PG::RecoveryState::RepWaitRecoveryReserved::react(const RemoteRecoveryReserved & { PG *pg = context< RecoveryMachine >().pg; pg->osd->send_message_osd_cluster( - pg->acting[0], + pg->actingonly[0], new MRecoveryReserve( MRecoveryReserve::GRANT, pg->info.pgid, @@ -5625,7 +5668,7 @@ PG::RecoveryState::RepWaitBackfillReserved::react(const RemoteBackfillReserved & { PG *pg = context< RecoveryMachine >().pg; pg->osd->send_message_osd_cluster( - pg->acting[0], + pg->actingonly[0], new MBackfillReserve( MBackfillReserve::GRANT, pg->info.pgid, @@ -5828,11 +5871,12 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx) // if we finished backfill, all acting are active; recheck if // DEGRADED is appropriate. - if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->acting.size()) + assert(pg->actingbackfill.size() > 0); + if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->actingbackfill.size()) pg->state_clear(PG_STATE_DEGRADED); // adjust acting set? (e.g. because backfill completed...) - if (pg->acting != pg->up && !pg->choose_acting(newest_update_osd)) + if (pg->actingonly != pg->up && !pg->choose_acting(newest_update_osd)) assert(pg->want_acting.size()); assert(!pg->needs_recovery()); @@ -5881,8 +5925,8 @@ void PG::RecoveryState::Clean::exit() PG::RecoveryState::Active::Active(my_context ctx) : my_base(ctx), NamedState(context< RecoveryMachine >().pg->cct, "Started/Primary/Active"), - sorted_acting_set(context< RecoveryMachine >().pg->acting.begin(), - context< RecoveryMachine >().pg->acting.end()), + sorted_acting_set(context< RecoveryMachine >().pg->actingonly.begin(), + context< RecoveryMachine >().pg->actingonly.end()), all_replicas_activated(false) { context< RecoveryMachine >().log_enter(state_name); @@ -5916,8 +5960,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap) for (vector<int>::iterator p = pg->want_acting.begin(); p != pg->want_acting.end(); ++p) { if (!advmap.osdmap->is_up(*p)) { - assert((std::find(pg->acting.begin(), pg->acting.end(), *p) != - pg->acting.end()) || + assert((std::find(pg->actingonly.begin(), pg->actingonly.end(), *p) != + pg->actingonly.end()) || (std::find(pg->up.begin(), pg->up.end(), *p) != pg->up.end())); } @@ -5927,10 +5971,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const AdvMap& advmap) * this does not matter) */ if (advmap.lastmap->get_pg_size(pg->info.pgid) != pg->get_osdmap()->get_pg_size(pg->info.pgid)) { - unsigned active = pg->acting.size(); - if (pg->backfill_target != -1) - --active; - if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= active) + if (pg->get_osdmap()->get_pg_size(pg->info.pgid) <= pg->actingonly.size()) pg->state_clear(PG_STATE_DEGRADED); else pg->state_set(PG_STATE_DEGRADED); @@ -6017,10 +6058,11 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe assert(pg->is_active()); assert(pg->is_primary()); + assert(pg->actingbackfill.size() > 0); // don't update history (yet) if we are active and primary; the replica // may be telling us they have activated (and committed) but we can't // share that until _everyone_ does the same. - if (pg->is_acting(infoevt.from)) { + if (pg->is_actingbackfill(infoevt.from)) { assert(pg->info.history.last_epoch_started < pg->info.history.same_interval_since); assert(infoevt.info.history.last_epoch_started >= @@ -6030,7 +6072,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe pg->peer_activated.insert(infoevt.from); } - if (pg->peer_activated.size() == pg->acting.size()) { + if (pg->peer_activated.size() == pg->actingbackfill.size()) { pg->all_activated_and_committed(); } return discard_event(); @@ -6535,7 +6577,9 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) // how much log to request? eversion_t request_log_from = pg->info.last_update; - for (vector<int>::iterator p = pg->acting.begin() + 1; p != pg->acting.end(); ++p) { + assert(pg->actingbackfill.size() > 0); + for (vector<int>::iterator p = pg->actingbackfill.begin() + 1; + p != pg->actingbackfill.end(); ++p) { pg_info_t& ri = pg->peer_info[*p]; if (ri.last_update >= best.log_tail && ri.last_update < request_log_from) request_log_from = ri.last_update; @@ -6719,8 +6763,9 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx) context< RecoveryMachine >().log_enter(state_name); PG *pg = context< RecoveryMachine >().pg; - for (vector<int>::iterator i = pg->acting.begin() + 1; - i != pg->acting.end(); + assert(pg->actingbackfill.size() > 0); + for (vector<int>::iterator i = pg->actingbackfill.begin() + 1; + i != pg->actingbackfill.end(); ++i) { const pg_info_t& pi = pg->peer_info[*i]; diff --git a/src/osd/PG.h b/src/osd/PG.h index dc11638fd4b..9b13f834080 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -333,7 +333,7 @@ public: // primary state public: - vector<int> up, acting, want_acting; + vector<int> up, actingonly, want_acting, actingbackfill; map<int,eversion_t> peer_last_complete_ondisk; eversion_t min_last_complete_ondisk; // up: min over last_complete_ondisk, peer_last_complete_ondisk eversion_t pg_trim_to; @@ -504,14 +504,18 @@ protected: BackfillInterval backfill_info; BackfillInterval peer_backfill_info; - int backfill_target; + vector<int> backfill_targets; bool backfill_reserved; bool backfill_reserving; friend class OSD; public: + //Compatibility with single backfill target code int get_backfill_target() const { + int backfill_target = -1; + if (backfill_targets.size() > 0) + backfill_target = backfill_targets[0]; return backfill_target; } @@ -556,16 +560,24 @@ public: void clear_primary_state(); public: +#if 0 + //This function not used bool is_acting(int osd) const { - for (unsigned i=0; i<acting.size(); i++) - if (acting[i] == osd) return true; + for (unsigned i=0; i<actingonly.size(); i++) + if (actingonly[i] == osd) return true; return false; } +#endif bool is_up(int osd) const { for (unsigned i=0; i<up.size(); i++) if (up[i] == osd) return true; return false; } + bool is_actingbackfill(int osd) const { + for (unsigned i=0; i<actingbackfill.size(); i++) + if (actingbackfill[i] == osd) return true; + return false; + } bool needs_recovery() const; bool needs_backfill() const; @@ -587,10 +599,11 @@ public: bool calc_min_last_complete_ondisk() { eversion_t min = last_complete_ondisk; - for (unsigned i=1; i<acting.size(); i++) { - if (peer_last_complete_ondisk.count(acting[i]) == 0) + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + if (peer_last_complete_ondisk.count(actingbackfill[i]) == 0) return false; // we don't have complete info - eversion_t a = peer_last_complete_ondisk[acting[i]]; + eversion_t a = peer_last_complete_ondisk[actingbackfill[i]]; if (a < min) min = a; } @@ -622,7 +635,7 @@ public: void trim_write_ahead(); map<int, pg_info_t>::const_iterator find_best_info(const map<int, pg_info_t> &infos) const; - bool calc_acting(int& newest_update_osd, vector<int>& want) const; + bool calc_acting(int& newest_update_osd, vector<int>& want, vector<int>& backfill) const; bool choose_acting(int& newest_update_osd); void build_might_have_unfound(); void replay_queued_ops(); @@ -1637,9 +1650,13 @@ public: public: pg_t get_pgid() const { return info.pgid; } +#if 0 + //Not used int get_nrep() const { return acting.size(); } +#endif - int get_primary() { return acting.empty() ? -1:acting[0]; } + // Rename actingonly -> acting + int get_primary() { return actingonly.empty() ? -1:actingonly[0]; } int get_role() const { return role; } void set_role(int r) { role = r; } diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 408c589a08a..856a541271c 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -96,6 +96,7 @@ virtual void queue_transaction(ObjectStore::Transaction *t) = 0; virtual epoch_t get_epoch() = 0; virtual const vector<int> &get_acting() = 0; + virtual const vector<int> &get_actingbackfill() = 0; virtual std::string gen_dbg_prefix() const = 0; virtual const map<hobject_t, set<int> > &get_missing_loc() = 0; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index fd4ffb77485..68e080c797a 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -222,7 +222,7 @@ void ReplicatedPG::on_peer_recover( publish_stats_to_osd(); // done! peer_missing[peer].got(soid, recovery_info.version); - if (peer == backfill_target && backfills_in_flight.count(soid)) + if (peer == get_backfill_target() && backfills_in_flight.count(soid)) backfills_in_flight.erase(soid); } @@ -297,15 +297,16 @@ bool ReplicatedPG::is_degraded_object(const hobject_t& soid) { if (pg_log.get_missing().missing.count(soid)) return true; - for (unsigned i = 1; i < acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i = 1; i < actingbackfill.size(); i++) { + int peer = actingbackfill[i]; if (peer_missing.count(peer) && peer_missing[peer].missing.count(soid)) return true; // Object is degraded if after last_backfill AND // we have are backfilling it - if (peer == backfill_target && + if (peer == get_backfill_target() && peer_info[peer].last_backfill <= soid && backfill_pos >= soid && backfills_in_flight.count(soid)) @@ -330,8 +331,9 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef << ", recovering" << dendl; eversion_t v; - for (unsigned i = 1; i < acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i = 1; i < actingbackfill.size(); i++) { + int peer = actingbackfill[i]; if (peer_missing.count(peer) && peer_missing[peer].missing.count(soid)) { v = peer_missing[peer].missing[soid].need; @@ -461,9 +463,21 @@ int ReplicatedPG::do_command(cmdmap_t cmdmap, ostream& ss, f->dump_unsigned("osd", *p); f->close_section(); f->open_array_section("acting"); - for (vector<int>::iterator p = acting.begin(); p != acting.end(); ++p) + for (vector<int>::iterator p = actingonly.begin(); p != actingonly.end(); ++p) f->dump_unsigned("osd", *p); f->close_section(); + if (backfill_targets.size() > 0) { + f->open_array_section("backfill_targets"); + for (vector<int>::iterator p = backfill_targets.begin(); p != backfill_targets.end(); ++p) + f->dump_unsigned("osd", *p); + f->close_section(); + } + if (actingbackfill.size() > 0) { + f->open_array_section("actingbackfill"); + for (vector<int>::iterator p = actingbackfill.begin(); p != actingbackfill.end(); ++p) + f->dump_unsigned("osd", *p); + f->close_section(); + } f->open_object_section("info"); info.dump(f.get()); f->close_section(); @@ -1005,6 +1019,7 @@ void ReplicatedPG::do_op(OpRequestRef op) // opposite is not a problem; if the target is after the line, we // don't apply on the backfill_target and it doesn't matter.) pg_info_t *backfill_target_info = NULL; + int backfill_target = get_backfill_target(); bool before_backfill = false; if (backfill_target >= 0) { backfill_target_info = &peer_info[backfill_target]; @@ -1568,7 +1583,7 @@ void ReplicatedPG::do_scan( case MOSDPGScan::OP_SCAN_DIGEST: { int from = m->get_source().num(); - assert(from == backfill_target); + assert(from == get_backfill_target()); BackfillInterval& bi = peer_backfill_info; bi.begin = m->begin; bi.end = m->end; @@ -4284,6 +4299,7 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) ctx->obc->ssc->snapset = ctx->new_snapset; info.stats.stats.add(ctx->delta_stats, ctx->obc->obs.oi.category); + int backfill_target = get_backfill_target(); if (backfill_target >= 0) { pg_info_t& pinfo = peer_info[backfill_target]; if (soid < pinfo.last_backfill) @@ -4861,18 +4877,19 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now) repop->v = ctx->at_version; // add myself to gather set - repop->waitfor_ack.insert(acting[0]); - repop->waitfor_disk.insert(acting[0]); + repop->waitfor_ack.insert(actingonly[0]); + repop->waitfor_disk.insert(actingonly[0]); int acks_wanted = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; - if (ctx->op && acting.size() > 1) { + assert(actingbackfill.size() > 0); + if (ctx->op && actingbackfill.size() > 1) { ostringstream ss; - ss << "waiting for subops from " << vector<int>(acting.begin() + 1, acting.end()); + ss << "waiting for subops from " << vector<int>(actingbackfill.begin() + 1, actingbackfill.end()); ctx->op->mark_sub_op_sent(ss.str()); } - for (unsigned i=1; i<acting.size(); i++) { - int peer = acting[i]; + for (unsigned i=1; i<actingbackfill.size(); i++) { + int peer = actingbackfill[i]; pg_info_t &pinfo = peer_info[peer]; repop->waitfor_ack.insert(peer); @@ -4889,6 +4906,7 @@ void ReplicatedPG::issue_repop(RepGather *repop, utime_t now) assert(0 == "broken implementation, do not use"); } + int backfill_target = get_backfill_target(); // ship resulting transaction, log entries, and pg_stats if (peer == backfill_target && soid >= backfill_pos && soid.pool == (int64_t)info.pgid.pool()) { // only skip normal (not temp pool=-1) objects @@ -5532,7 +5550,7 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op) // we better not be missing this. assert(!pg_log.get_missing().is_missing(soid)); - int ackerosd = acting[0]; + int ackerosd = actingonly[0]; op->mark_started(); @@ -7038,10 +7056,11 @@ eversion_t ReplicatedPG::pick_newest_available(const hobject_t& oid) v = pg_log.get_missing().missing.find(oid)->second.have; dout(10) << "pick_newest_available " << oid << " " << v << " on osd." << osd->whoami << " (local)" << dendl; - for (unsigned i=1; i<acting.size(); ++i) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); ++i) { + int peer = actingbackfill[i]; if (!peer_missing[peer].is_missing(oid)) { - assert(peer == backfill_target); + assert(peer == get_backfill_target()); continue; } eversion_t h = peer_missing[peer].missing[oid].have; @@ -7307,17 +7326,16 @@ void ReplicatedPG::on_shutdown() cancel_recovery(); } +//XXX: For now only care about a single backfill at a time void ReplicatedPG::on_activate() { - for (unsigned i = 1; i<acting.size(); i++) { - if (peer_info[acting[i]].last_backfill != hobject_t::get_max()) { - assert(backfill_target == -1); - backfill_target = acting[i]; - backfill_pos = peer_info[acting[i]].last_backfill; - dout(10) << " chose backfill target osd." << backfill_target - << " from " << backfill_pos << dendl; - } - } + int backfill_target = get_backfill_target(); + if (backfill_target == -1) + return; + backfill_pos = peer_info[backfill_target].last_backfill; + assert(backfill_pos != hobject_t::get_max()); + dout(10) << " chose backfill target osd." << backfill_target + << " from " << backfill_pos << dendl; } void ReplicatedPG::on_change(ObjectStore::Transaction *t) @@ -7512,6 +7530,7 @@ int ReplicatedPG::start_recovery_ops( } bool deferred_backfill = false; + int backfill_target = get_backfill_target(); if (recovering.empty() && state_test(PG_STATE_BACKFILL) && backfill_target >= 0 && started < max && @@ -7766,6 +7785,7 @@ int ReplicatedPG::prep_object_replica_pushes( const hobject_t& soid, eversion_t v, PGBackend::RecoveryHandle *h) { + assert(is_primary()); dout(10) << __func__ << ": on " << soid << dendl; // NOTE: we know we will get a valid oloc off of disk here. @@ -7773,8 +7793,9 @@ int ReplicatedPG::prep_object_replica_pushes( if (!obc) { pg_log.missing_add(soid, v, eversion_t()); bool uhoh = true; - for (unsigned i=1; i<acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + int peer = actingbackfill[i]; if (!peer_missing[peer].is_missing(soid, v)) { missing_loc[soid].insert(peer); missing_loc_sources.insert(peer); @@ -7817,8 +7838,9 @@ int ReplicatedBackend::start_pushes( { int pushes = 0; // who needs it? - for (unsigned i=1; i<get_parent()->get_acting().size(); i++) { - int peer = get_parent()->get_acting()[i]; + assert(get_parent()->get_actingbackfill().size() > 0); + for (unsigned i=1; i<get_parent()->get_actingbackfill().size(); i++) { + int peer = get_parent()->get_actingbackfill()[i]; map<int, pg_missing_t>::const_iterator j = get_parent()->get_peer_missing().find(peer); assert(j != get_parent()->get_peer_missing().end()); @@ -7841,8 +7863,9 @@ int ReplicatedPG::recover_replicas(int max, ThreadPool::TPHandle &handle) PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op(); // this is FAR from an optimal recovery order. pretty lame, really. - for (unsigned i=1; i<acting.size(); i++) { - int peer = acting[i]; + assert(actingbackfill.size() > 0); + for (unsigned i=1; i<actingbackfill.size(); i++) { + int peer = actingbackfill[i]; map<int, pg_missing_t>::const_iterator pm = peer_missing.find(peer); assert(pm != peer_missing.end()); map<int, pg_info_t>::const_iterator pi = peer_info.find(peer); @@ -7919,6 +7942,7 @@ int ReplicatedPG::recover_backfill( ThreadPool::TPHandle &handle) { dout(10) << "recover_backfill (" << max << ")" << dendl; + int backfill_target = get_backfill_target(); assert(backfill_target >= 0); pg_info_t& pinfo = peer_info[backfill_target]; @@ -8106,7 +8130,7 @@ void ReplicatedPG::prep_backfill_object_push( dout(10) << "push_backfill_object " << oid << " v " << v << " to osd." << peer << dendl; backfills_in_flight.insert(oid); - map<int, pg_missing_t>::iterator bpm = peer_missing.find(backfill_target); + map<int, pg_missing_t>::iterator bpm = peer_missing.find(get_backfill_target()); assert(bpm != peer_missing.end()); bpm->second.add(oid, eversion_t(), eversion_t()); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 00216170516..77211fef474 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -279,8 +279,12 @@ public: epoch_t get_epoch() { return get_osdmap()->get_epoch(); } + //Not used const vector<int> &get_acting() { - return acting; + return actingonly; + } + const vector<int> &get_actingbackfill() { + return actingbackfill; } std::string gen_dbg_prefix() const { return gen_prefix(); } |