summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-11-29 15:47:26 -0800
committerSage Weil <sage@inktank.com>2012-11-29 15:47:26 -0800
commit77711ddee37de154c0d5d452c4f84dae36eb3e3a (patch)
tree9ccf8834d8c135cac4c8509aa91334df12d734a0
parentef39773c18fddfddaceebecec608173323888e82 (diff)
parent5c8cbd28207195b094799a7bdbad0019669682a8 (diff)
downloadceph-77711ddee37de154c0d5d452c4f84dae36eb3e3a.tar.gz
Merge remote-tracking branch 'gh/wip_next_bugs' into next
-rw-r--r--src/os/hobject.h17
-rw-r--r--src/osd/OSD.cc12
-rw-r--r--src/osd/PG.cc23
-rw-r--r--src/osd/ReplicatedPG.cc16
-rw-r--r--src/osd/osd_types.cc11
-rw-r--r--src/osd/osd_types.h6
-rw-r--r--src/test/filestore/store_test.cc6
7 files changed, 64 insertions, 27 deletions
diff --git a/src/os/hobject.h b/src/os/hobject.h
index 9a1c207e796..d75ae8570c4 100644
--- a/src/os/hobject.h
+++ b/src/os/hobject.h
@@ -31,7 +31,9 @@ struct hobject_t {
object_t oid;
snapid_t snap;
uint32_t hash;
+private:
bool max;
+public:
int64_t pool;
string nspace;
@@ -57,6 +59,15 @@ public:
pool(pool),
key(soid.oid.name == key ? string() : key) {}
+ /// @return min hobject_t ret s.t. ret.hash == this->hash
+ hobject_t get_boundary() const {
+ if (is_max())
+ return *this;
+ hobject_t ret;
+ ret.hash = hash;
+ return ret;
+ }
+
/* Do not use when a particular hash function is needed */
explicit hobject_t(const sobject_t &o) :
oid(o.oid), snap(o.snap), max(false), pool(-1) {
@@ -108,6 +119,12 @@ public:
void decode(json_spirit::Value& v);
void dump(Formatter *f) const;
static void generate_test_instances(list<hobject_t*>& o);
+ friend bool operator<(const hobject_t&, const hobject_t&);
+ friend bool operator>(const hobject_t&, const hobject_t&);
+ friend bool operator<=(const hobject_t&, const hobject_t&);
+ friend bool operator>=(const hobject_t&, const hobject_t&);
+ friend bool operator==(const hobject_t&, const hobject_t&);
+ friend bool operator!=(const hobject_t&, const hobject_t&);
};
WRITE_CLASS_ENCODER(hobject_t)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 96052820a15..913157a8508 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -1307,15 +1307,6 @@ PG *OSD::_create_lock_pg(
t.create_collection(coll_t(pgid));
- if (newly_created) {
- /* This is weird, but all the peering code needs last_epoch_start
- * to be less than same_interval_since. Make it so!
- * This is easier to deal with if you remember that the PG, while
- * now created in memory, still hasn't peered and started -- and
- * the map epoch could change before that happens! */
- history.last_epoch_started = history.epoch_created - 1;
- }
-
pg->init(role, up, acting, history, pi, &t);
dout(7) << "_create_lock_pg " << *pg << dendl;
@@ -1586,6 +1577,8 @@ PG *OSD::get_or_create_pg(const pg_info_t& info, pg_interval_map_t& pi,
if (!_have_pg(info.pgid)) {
// same primary?
+ if (!osdmap->have_pg_pool(info.pgid.pool()))
+ return 0;
vector<int> up, acting;
osdmap->pg_to_up_acting_osds(info.pgid, up, acting);
int role = osdmap->calc_pg_role(whoami, acting, acting.size());
@@ -4502,6 +4495,7 @@ void OSD::handle_pg_create(OpRequestRef op)
0, creating_pgs[pgid].acting, creating_pgs[pgid].acting,
history, pi,
*rctx.transaction);
+ pg->info.last_epoch_started = pg->info.history.last_epoch_started;
creating_pgs.erase(pgid);
wake_pg_waiters(pg->info.pgid);
pg->handle_create(&rctx);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 53e9ae4bb19..b81bfed36f4 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1042,11 +1042,11 @@ map<int, pg_info_t>::const_iterator PG::find_best_info(const map<int, pg_info_t>
for (map<int, pg_info_t>::const_iterator i = infos.begin();
i != infos.end();
++i) {
- if (max_last_epoch_started_found < i->second.history.last_epoch_started) {
+ if (max_last_epoch_started_found < i->second.last_epoch_started) {
min_last_update_acceptable = eversion_t::max();
- max_last_epoch_started_found = i->second.history.last_epoch_started;
+ max_last_epoch_started_found = i->second.last_epoch_started;
}
- if (max_last_epoch_started_found == i->second.history.last_epoch_started) {
+ if (max_last_epoch_started_found == i->second.last_epoch_started) {
if (min_last_update_acceptable > i->second.last_update)
min_last_update_acceptable = i->second.last_update;
}
@@ -1381,6 +1381,8 @@ void PG::activate(ObjectStore::Transaction& t,
send_notify = false;
+ info.last_epoch_started = query_epoch;
+
if (is_primary()) {
// If necessary, create might_have_unfound to help us find our unfound objects.
// NOTE: It's important that we build might_have_unfound before trimming the
@@ -1774,7 +1776,8 @@ void PG::all_activated_and_committed()
assert(is_primary());
assert(peer_activated.size() == acting.size());
- info.history.last_epoch_started = get_osdmap()->get_epoch();
+ // info.last_epoch_started is set during activate()
+ info.history.last_epoch_started = info.last_epoch_started;
share_pg_info();
update_stats();
@@ -3691,7 +3694,7 @@ void PG::chunky_scrub() {
// search backward from the end looking for a boundary
objects.push_back(scrubber.end);
while (!boundary_found && objects.size() > 1) {
- hobject_t end = objects.back();
+ hobject_t end = objects.back().get_boundary();
objects.pop_back();
if (objects.back().get_filestore_key() != end.get_filestore_key()) {
@@ -4134,6 +4137,10 @@ void PG::share_pg_info()
// share new pg_info_t with replicas
for (unsigned i=1; i<acting.size(); i++) {
int peer = acting[i];
+ if (peer_info.count(i)) {
+ peer_info[i].last_epoch_started = info.last_epoch_started;
+ peer_info[i].history.merge(info.history);
+ }
MOSDPGInfo *m = new MOSDPGInfo(get_osdmap()->get_epoch());
m->pg_list.push_back(
make_pair(
@@ -4523,6 +4530,10 @@ void PG::proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &oinfo)
dirty_info = true;
osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp);
+ assert(oinfo.last_epoch_started == info.last_epoch_started);
+ assert(info.history.last_epoch_started == oinfo.last_epoch_started);
+ assert(oinfo.history.last_epoch_started == oinfo.last_epoch_started);
+
// Handle changes to purged_snaps ONLY IF we have caught up
if (last_complete_ondisk.epoch >= info.history.last_epoch_started) {
interval_set<snapid_t> p;
@@ -6451,7 +6462,7 @@ PG::RecoveryState::GetMissing::GetMissing(my_context ctx)
// We pull the log from the peer's last_epoch_started to ensure we
// get enough log to detect divergent updates.
- eversion_t since(pi.history.last_epoch_started, 0);
+ eversion_t since(pi.last_epoch_started, 0);
assert(pi.last_update >= pg->info.log_tail); // or else choose_acting() did a bad thing
if (pi.log_tail <= since) {
dout(10) << " requesting log+missing since " << since << " from osd." << *i << dendl;
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 2513d9d6fe4..388751e8e8b 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -7168,10 +7168,15 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&)
dout(10) << "NotTrimming: obs_to_trim empty!" << dendl;
dout(10) << "purged_snaps now " << pg->info.purged_snaps << ", snap_trimq now "
<< pg->snap_trimq << dendl;
- ObjectStore::Transaction *t = new ObjectStore::Transaction;
- t->remove_collection(col_to_trim);
- int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t));
- assert(r == 0);
+ if (pg->snap_collections.contains(snap_to_trim)) {
+ ObjectStore::Transaction *t = new ObjectStore::Transaction;
+ pg->snap_collections.erase(snap_to_trim);
+ t->remove_collection(col_to_trim);
+ pg->write_info(*t);
+ int r = pg->osd->store->queue_transaction(
+ NULL, t, new ObjectStore::C_DeleteTransaction(t));
+ assert(r == 0);
+ }
post_event(SnapTrim());
return discard_event();
} else {
@@ -7222,9 +7227,10 @@ boost::statechart::result ReplicatedPG::RepColTrim::react(const SnapTrim&)
t->collection_remove(col_to_trim, *i);
}
t->remove_collection(col_to_trim);
+ pg->snap_collections.erase(snap_to_trim);
+ pg->write_info(*t);
int r = pg->osd->store->queue_transaction(NULL, t, new ObjectStore::C_DeleteTransaction(t));
assert(r == 0);
- pg->snap_collections.erase(snap_to_trim);
return discard_event();
}
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 1ca5adc70be..4a1b3fcf2ef 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -1286,7 +1286,7 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
void pg_info_t::encode(bufferlist &bl) const
{
- ENCODE_START(26, 26, bl);
+ ENCODE_START(27, 26, bl);
::encode(pgid, bl);
::encode(last_update, bl);
::encode(last_complete, bl);
@@ -1295,12 +1295,13 @@ void pg_info_t::encode(bufferlist &bl) const
::encode(stats, bl);
history.encode(bl);
::encode(purged_snaps, bl);
+ ::encode(last_epoch_started, bl);
ENCODE_FINISH(bl);
}
void pg_info_t::decode(bufferlist::iterator &bl)
{
- DECODE_START_LEGACY_COMPAT_LEN(26, 26, 26, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(27, 26, 26, bl);
if (struct_v < 23) {
old_pg_t opgid;
::decode(opgid, bl);
@@ -1325,6 +1326,11 @@ void pg_info_t::decode(bufferlist::iterator &bl)
set<snapid_t> snap_trimq;
::decode(snap_trimq, bl);
}
+ if (struct_v < 27) {
+ last_epoch_started = history.last_epoch_started;
+ } else {
+ ::decode(last_epoch_started, bl);
+ }
DECODE_FINISH(bl);
}
@@ -1348,6 +1354,7 @@ void pg_info_t::dump(Formatter *f) const
f->dump_int("empty", is_empty());
f->dump_int("dne", dne());
f->dump_int("incomplete", is_incomplete());
+ f->dump_int("last_epoch_started", last_epoch_started);
}
void pg_info_t::generate_test_instances(list<pg_info_t*>& o)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index bb2ed253ce6..da2b2abf319 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -1039,6 +1039,7 @@ struct pg_info_t {
pg_t pgid;
eversion_t last_update; // last object version applied to store.
eversion_t last_complete; // last version pg was complete through.
+ epoch_t last_epoch_started;// last epoch at which this pg started on this osd
eversion_t log_tail; // oldest log entry.
@@ -1051,11 +1052,11 @@ struct pg_info_t {
pg_history_t history;
pg_info_t()
- : last_backfill(hobject_t::get_max())
+ : last_epoch_started(0), last_backfill(hobject_t::get_max())
{ }
pg_info_t(pg_t p)
: pgid(p),
- last_backfill(hobject_t::get_max())
+ last_epoch_started(0), last_backfill(hobject_t::get_max())
{ }
bool is_empty() const { return last_update.version == 0; }
@@ -1086,6 +1087,7 @@ inline ostream& operator<<(ostream& out, const pg_info_t& pgi)
out << " lb " << pgi.last_backfill;
}
//out << " c " << pgi.epoch_created;
+ out << " local-les=" << pgi.last_epoch_started;
out << " n=" << pgi.stats.stats.sum.num_objects;
out << " " << pgi.history
<< ")";
diff --git a/src/test/filestore/store_test.cc b/src/test/filestore/store_test.cc
index 55fde77f0fc..3a41fa10e4c 100644
--- a/src/test/filestore/store_test.cc
+++ b/src/test/filestore/store_test.cc
@@ -212,7 +212,7 @@ TEST_F(StoreTest, ManyObjectTest) {
ASSERT_EQ(r, 0);
listed.insert(objects.begin(), objects.end());
if (objects.size() < 50) {
- ASSERT_TRUE(next.max);
+ ASSERT_TRUE(next.is_max());
break;
}
objects.clear();
@@ -385,7 +385,7 @@ public:
ASSERT_TRUE(sorted(objects));
objects_set.insert(objects.begin(), objects.end());
objects.clear();
- if (next.max) break;
+ if (next.is_max()) break;
current = next;
}
ASSERT_EQ(objects_set.size(), available_objects.size());
@@ -529,7 +529,7 @@ TEST_F(StoreTest, HashCollisionTest) {
listed.insert(*i);
}
if (objects.size() < 50) {
- ASSERT_TRUE(next.max);
+ ASSERT_TRUE(next.is_max());
break;
}
objects.clear();