diff options
author | Sage Weil <sage@inktank.com> | 2012-11-29 15:47:26 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2012-11-29 15:47:26 -0800 |
commit | 77711ddee37de154c0d5d452c4f84dae36eb3e3a (patch) | |
tree | 9ccf8834d8c135cac4c8509aa91334df12d734a0 | |
parent | ef39773c18fddfddaceebecec608173323888e82 (diff) | |
parent | 5c8cbd28207195b094799a7bdbad0019669682a8 (diff) | |
download | ceph-77711ddee37de154c0d5d452c4f84dae36eb3e3a.tar.gz |
Merge remote-tracking branch 'gh/wip_next_bugs' into next
-rw-r--r-- | src/os/hobject.h | 17 | ||||
-rw-r--r-- | src/osd/OSD.cc | 12 | ||||
-rw-r--r-- | src/osd/PG.cc | 23 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 16 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 11 | ||||
-rw-r--r-- | src/osd/osd_types.h | 6 | ||||
-rw-r--r-- | src/test/filestore/store_test.cc | 6 |
7 files changed, 64 insertions, 27 deletions
diff --git a/src/os/hobject.h b/src/os/hobject.h index 9a1c207e796..d75ae8570c4 100644 --- a/src/os/hobject.h +++ b/src/os/hobject.h @@ -31,7 +31,9 @@ struct hobject_t { object_t oid; snapid_t snap; uint32_t hash; +private: bool max; +public: int64_t pool; string nspace; @@ -57,6 +59,15 @@ public: pool(pool), key(soid.oid.name == key ? string() : key) {} + /// @return min hobject_t ret s.t. ret.hash == this->hash + hobject_t get_boundary() const { + if (is_max()) + return *this; + hobject_t ret; + ret.hash = hash; + return ret; + } + /* Do not use when a particular hash function is needed */ explicit hobject_t(const sobject_t &o) : oid(o.oid), snap(o.snap), max(false), pool(-1) { @@ -108,6 +119,12 @@ public: void decode(json_spirit::Value& v); void dump(Formatter *f) const; static void generate_test_instances(list<hobject_t*>& o); + friend bool operator<(const hobject_t&, const hobject_t&); + friend bool operator>(const hobject_t&, const hobject_t&); + friend bool operator<=(const hobject_t&, const hobject_t&); + friend bool operator>=(const hobject_t&, const hobject_t&); + friend bool operator==(const hobject_t&, const hobject_t&); + friend bool operator!=(const hobject_t&, const hobject_t&); }; WRITE_CLASS_ENCODER(hobject_t) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 96052820a15..913157a8508 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1307,15 +1307,6 @@ PG *OSD::_create_lock_pg( t.create_collection(coll_t(pgid)); - if (newly_created) { - /* This is weird, but all the peering code needs last_epoch_start - * to be less than same_interval_since. Make it so! - * This is easier to deal with if you remember that the PG, while - * now created in memory, still hasn't peered and started -- and - * the map epoch could change before that happens! */ - history.last_epoch_started = history.epoch_created - 1; - } - pg->init(role, up, acting, history, pi, &t); dout(7) << "_create_lock_pg " << *pg << dendl; @@ -1586,6 +1577,8 @@ PG *OSD::get_or_create_pg(const pg_info_t& info, pg_interval_map_t& pi, if (!_have_pg(info.pgid)) { // same primary? + if (!osdmap->have_pg_pool(info.pgid.pool())) + return 0; vector<int> up, acting; osdmap->pg_to_up_acting_osds(info.pgid, up, acting); int role = osdmap->calc_pg_role(whoami, acting, acting.size()); @@ -4502,6 +4495,7 @@ void OSD::handle_pg_create(OpRequestRef op) 0, creating_pgs[pgid].acting, creating_pgs[pgid].acting, history, pi, *rctx.transaction); + pg->info.last_epoch_started = pg->info.history.last_epoch_started; creating_pgs.erase(pgid); wake_pg_waiters(pg->info.pgid); pg->handle_create(&rctx); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 53e9ae4bb19..b81bfed36f4 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1042,11 +1042,11 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t> for (map<int, pg_info_t>::const_iterator i = infos.begin(); i != infos.end(); ++i) { - if (max_last_epoch_started_found < i->second.history.last_epoch_started) { + if (max_last_epoch_started_found < i->second.last_epoch_started) { min_last_update_acceptable = eversion_t::max(); - max_last_epoch_started_found = i->second.history.last_epoch_started; + max_last_epoch_started_found = i->second.last_epoch_started; } - if (max_last_epoch_started_found == i->second.history.last_epoch_started) { + if (max_last_epoch_started_found == i->second.last_epoch_started) { if (min_last_update_acceptable > i->second.last_update) min_last_update_acceptable = i->second.last_update; } @@ -1381,6 +1381,8 @@ void PG::activate(ObjectStore::Transaction& t, send_notify = false; + info.last_epoch_started = query_epoch; + if (is_primary()) { // If necessary, create might_have_unfound to help us find our unfound objects. // NOTE: It's important that we build might_have_unfound before trimming the @@ -1774,7 +1776,8 @@ void PG::all_activated_and_committed() assert(is_primary()); assert(peer_activated.size() == acting.size()); - info.history.last_epoch_started = get_osdmap()->get_epoch(); + // info.last_epoch_started is set during activate() + info.history.last_epoch_started = info.last_epoch_started; share_pg_info(); update_stats(); @@ -3691,7 +3694,7 @@ void PG::chunky_scrub() { // search backward from the end looking for a boundary objects.push_back(scrubber.end); while (!boundary_found && objects.size() > 1) { - hobject_t end = objects.back(); + hobject_t end = objects.back().get_boundary(); objects.pop_back(); if (objects.back().get_filestore_key() != end.get_filestore_key()) { @@ -4134,6 +4137,10 @@ void PG::share_pg_info() // share new pg_info_t with replicas for (unsigned i=1; i<acting.size(); i++) { int peer = acting[i]; + if (peer_info.count(i)) { + peer_info[i].last_epoch_started = info.last_epoch_started; + peer_info[i].history.merge(info.history); + } MOSDPGInfo *m = new MOSDPGInfo(get_osdmap()->get_epoch()); m->pg_list.push_back( make_pair( @@ -4523,6 +4530,10 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo) dirty_info = true; osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); + assert(oinfo.last_epoch_started == info.last_epoch_started); + assert(info.history.last_epoch_started == oinfo.last_epoch_started); + assert(oinfo.history.last_epoch_started == oinfo.last_epoch_started); + // Handle changes to purged_snaps ONLY IF we have caught up if (last_complete_ondisk.epoch >= info.history.last_epoch_started) { interval_set<snapid_t> p; @@ -6451,7 +6462,7 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx) // We pull the log from the peer's last_epoch_started to ensure we // get enough log to detect divergent updates. - eversion_t since(pi.history.last_epoch_started, 0); + eversion_t since(pi.last_epoch_started, 0); assert(pi.last_update >= pg->info.log_tail); // or else choose_acting() did a bad thing if (pi.log_tail <= since) { dout(10) << " requesting log+missing since " << since << " from osd." << *i << dendl; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 2513d9d6fe4..388751e8e8b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -7168,10 +7168,15 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&) dout(10) << "NotTrimming: obs_to_trim empty!" << dendl; dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now " << pg->snap_trimq << dendl; - ObjectStore::Transaction *t = new ObjectStore::Transaction; - t->remove_collection(col_to_trim); - int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t)); - assert(r == 0); + if (pg->snap_collections.contains(snap_to_trim)) { + ObjectStore::Transaction *t = new ObjectStore::Transaction; + pg->snap_collections.erase(snap_to_trim); + t->remove_collection(col_to_trim); + pg->write_info(*t); + int r = pg->osd->store->queue_transaction( + NULL, t, new ObjectStore::C_DeleteTransaction(t)); + assert(r == 0); + } post_event(SnapTrim()); return discard_event(); } else { @@ -7222,9 +7227,10 @@ boost::statechart::result ReplicatedPG::RepColTrim::react(const SnapTrim&) t->collection_remove(col_to_trim, *i); } t->remove_collection(col_to_trim); + pg->snap_collections.erase(snap_to_trim); + pg->write_info(*t); int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t)); assert(r == 0); - pg->snap_collections.erase(snap_to_trim); return discard_event(); } diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 1ca5adc70be..4a1b3fcf2ef 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1286,7 +1286,7 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o) void pg_info_t::encode(bufferlist &bl) const { - ENCODE_START(26, 26, bl); + ENCODE_START(27, 26, bl); ::encode(pgid, bl); ::encode(last_update, bl); ::encode(last_complete, bl); @@ -1295,12 +1295,13 @@ void pg_info_t::encode(bufferlist &bl) const ::encode(stats, bl); history.encode(bl); ::encode(purged_snaps, bl); + ::encode(last_epoch_started, bl); ENCODE_FINISH(bl); } void pg_info_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(26, 26, 26, bl); + DECODE_START_LEGACY_COMPAT_LEN(27, 26, 26, bl); if (struct_v < 23) { old_pg_t opgid; ::decode(opgid, bl); @@ -1325,6 +1326,11 @@ void pg_info_t::decode(bufferlist::iterator &bl) set<snapid_t> snap_trimq; ::decode(snap_trimq, bl); } + if (struct_v < 27) { + last_epoch_started = history.last_epoch_started; + } else { + ::decode(last_epoch_started, bl); + } DECODE_FINISH(bl); } @@ -1348,6 +1354,7 @@ void pg_info_t::dump(Formatter *f) const f->dump_int("empty", is_empty()); f->dump_int("dne", dne()); f->dump_int("incomplete", is_incomplete()); + f->dump_int("last_epoch_started", last_epoch_started); } void pg_info_t::generate_test_instances(list<pg_info_t*>& o) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index bb2ed253ce6..da2b2abf319 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1039,6 +1039,7 @@ struct pg_info_t { pg_t pgid; eversion_t last_update; // last object version applied to store. eversion_t last_complete; // last version pg was complete through. + epoch_t last_epoch_started;// last epoch at which this pg started on this osd eversion_t log_tail; // oldest log entry. @@ -1051,11 +1052,11 @@ struct pg_info_t { pg_history_t history; pg_info_t() - : last_backfill(hobject_t::get_max()) + : last_epoch_started(0), last_backfill(hobject_t::get_max()) { } pg_info_t(pg_t p) : pgid(p), - last_backfill(hobject_t::get_max()) + last_epoch_started(0), last_backfill(hobject_t::get_max()) { } bool is_empty() const { return last_update.version == 0; } @@ -1086,6 +1087,7 @@ inline ostream& operator<<(ostream& out, const pg_info_t& pgi) out << " lb " << pgi.last_backfill; } //out << " c " << pgi.epoch_created; + out << " local-les=" << pgi.last_epoch_started; out << " n=" << pgi.stats.stats.sum.num_objects; out << " " << pgi.history << ")"; diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc index 55fde77f0fc..3a41fa10e4c 100644 --- a/src/test/filestore/store_test.cc +++ b/src/test/filestore/store_test.cc @@ -212,7 +212,7 @@ TEST_F(StoreTest, ManyObjectTest) { ASSERT_EQ(r, 0); listed.insert(objects.begin(), objects.end()); if (objects.size() < 50) { - ASSERT_TRUE(next.max); + ASSERT_TRUE(next.is_max()); break; } objects.clear(); @@ -385,7 +385,7 @@ public: ASSERT_TRUE(sorted(objects)); objects_set.insert(objects.begin(), objects.end()); objects.clear(); - if (next.max) break; + if (next.is_max()) break; current = next; } ASSERT_EQ(objects_set.size(), available_objects.size()); @@ -529,7 +529,7 @@ TEST_F(StoreTest, HashCollisionTest) { listed.insert(*i); } if (objects.size() < 50) { - ASSERT_TRUE(next.max); + ASSERT_TRUE(next.is_max()); break; } objects.clear(); |