summaryrefslogtreecommitdiff
path: root/src/osd/PG.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/osd/PG.h')
-rw-r--r--src/osd/PG.h242
1 files changed, 4 insertions, 238 deletions
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 8d8ad5c4c45..134f5ec470f 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -36,6 +36,7 @@
#include "include/atomic.h"
#include "SnapMapper.h"
+#include "PGLog.h"
#include "OpRequest.h"
#include "OSDMap.h"
#include "os/ObjectStore.h"
@@ -155,212 +156,8 @@ struct PGPool {
class PG {
public:
- /* Exceptions */
- class read_log_error : public buffer::error {
- public:
- explicit read_log_error(const char *what) {
- snprintf(buf, sizeof(buf), "read_log_error: %s", what);
- }
- const char *what() const throw () {
- return buf;
- }
- private:
- char buf[512];
- };
-
std::string gen_prefix() const;
-
- /**
- * IndexLog - adds in-memory index of the log, by oid.
- * plus some methods to manipulate it all.
- */
- struct IndexedLog : public pg_log_t {
- hash_map<hobject_t,pg_log_entry_t*> objects; // ptrs into log. be careful!
- hash_map<osd_reqid_t,pg_log_entry_t*> caller_ops;
-
- // recovery pointers
- list<pg_log_entry_t>::iterator complete_to; // not inclusive of referenced item
- version_t last_requested; // last object requested by primary
-
- /****/
- IndexedLog() : last_requested(0) {}
-
- void claim_log(const pg_log_t& o) {
- log = o.log;
- head = o.head;
- tail = o.tail;
- index();
- }
-
- void split_into(
- pg_t child_pgid,
- unsigned split_bits,
- IndexedLog *olog);
-
- void zero() {
- unindex();
- pg_log_t::clear();
- reset_recovery_pointers();
- }
- void reset_recovery_pointers() {
- complete_to = log.end();
- last_requested = 0;
- }
-
- bool logged_object(const hobject_t& oid) const {
- return objects.count(oid);
- }
- bool logged_req(const osd_reqid_t &r) const {
- return caller_ops.count(r);
- }
- eversion_t get_request_version(const osd_reqid_t &r) const {
- hash_map<osd_reqid_t,pg_log_entry_t*>::const_iterator p = caller_ops.find(r);
- if (p == caller_ops.end())
- return eversion_t();
- return p->second->version;
- }
-
- void index() {
- objects.clear();
- caller_ops.clear();
- for (list<pg_log_entry_t>::iterator i = log.begin();
- i != log.end();
- ++i) {
- objects[i->soid] = &(*i);
- if (i->reqid_is_indexed()) {
- //assert(caller_ops.count(i->reqid) == 0); // divergent merge_log indexes new before unindexing old
- caller_ops[i->reqid] = &(*i);
- }
- }
- }
-
- void index(pg_log_entry_t& e) {
- if (objects.count(e.soid) == 0 ||
- objects[e.soid]->version < e.version)
- objects[e.soid] = &e;
- if (e.reqid_is_indexed()) {
- //assert(caller_ops.count(i->reqid) == 0); // divergent merge_log indexes new before unindexing old
- caller_ops[e.reqid] = &e;
- }
- }
- void unindex() {
- objects.clear();
- caller_ops.clear();
- }
- void unindex(pg_log_entry_t& e) {
- // NOTE: this only works if we remove from the _tail_ of the log!
- if (objects.count(e.soid) && objects[e.soid]->version == e.version)
- objects.erase(e.soid);
- if (e.reqid_is_indexed() &&
- caller_ops.count(e.reqid) && // divergent merge_log indexes new before unindexing old
- caller_ops[e.reqid] == &e)
- caller_ops.erase(e.reqid);
- }
-
- // actors
- void add(pg_log_entry_t& e) {
- // add to log
- log.push_back(e);
- assert(e.version > head);
- assert(head.version == 0 || e.version.version > head.version);
- head = e.version;
-
- // to our index
- objects[e.soid] = &(log.back());
- caller_ops[e.reqid] = &(log.back());
- }
-
- void trim(ObjectStore::Transaction &t, hobject_t& oid, eversion_t s);
-
- ostream& print(ostream& out) const;
- };
-
-
- /**
- * OndiskLog - some info about how we store the log on disk.
- */
- class OndiskLog {
- public:
- // ok
- uint64_t tail; // first byte of log.
- uint64_t head; // byte following end of log.
- uint64_t zero_to; // first non-zeroed byte of log.
- bool has_checksums;
-
- /**
- * We reconstruct the missing set by comparing the recorded log against
- * the objects in the pg collection. Unfortunately, it's possible to
- * have an object in the missing set which is not in the log due to
- * a divergent operation with a prior_version pointing before the
- * pg log tail. To deal with this, we store alongside the log a mapping
- * of divergent priors to be checked along with the log during read_state.
- */
- map<eversion_t, hobject_t> divergent_priors;
- void add_divergent_prior(eversion_t version, hobject_t obj) {
- divergent_priors.insert(make_pair(version, obj));
- }
-
- OndiskLog() : tail(0), head(0), zero_to(0),
- has_checksums(true) {}
-
- uint64_t length() { return head - tail; }
- bool trim_to(eversion_t v, ObjectStore::Transaction& t);
-
- void zero() {
- tail = 0;
- head = 0;
- zero_to = 0;
- }
-
- void encode(bufferlist& bl) const {
- ENCODE_START(5, 3, bl);
- ::encode(tail, bl);
- ::encode(head, bl);
- ::encode(zero_to, bl);
- ::encode(divergent_priors, bl);
- ENCODE_FINISH(bl);
- }
- void decode(bufferlist::iterator& bl) {
- DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl);
- has_checksums = (struct_v >= 2);
- ::decode(tail, bl);
- ::decode(head, bl);
- if (struct_v >= 4)
- ::decode(zero_to, bl);
- else
- zero_to = 0;
- if (struct_v >= 5)
- ::decode(divergent_priors, bl);
- DECODE_FINISH(bl);
- }
- void dump(Formatter *f) const {
- f->dump_unsigned("head", head);
- f->dump_unsigned("tail", tail);
- f->dump_unsigned("zero_to", zero_to);
- f->open_array_section("divergent_priors");
- for (map<eversion_t, hobject_t>::const_iterator p = divergent_priors.begin();
- p != divergent_priors.end();
- ++p) {
- f->open_object_section("prior");
- f->dump_stream("version") << p->first;
- f->dump_stream("object") << p->second;
- f->close_section();
- }
- f->close_section();
- }
- static void generate_test_instances(list<OndiskLog*>& o) {
- o.push_back(new OndiskLog);
- o.push_back(new OndiskLog);
- o.back()->tail = 2;
- o.back()->head = 3;
- o.back()->zero_to = 1;
- }
- };
- WRITE_CLASS_ENCODER(OndiskLog)
-
-
-
/*** PG ****/
protected:
OSDService *osd;
@@ -466,7 +263,7 @@ public:
const interval_set<snapid_t> &snapcolls);
const coll_t coll;
- IndexedLog log;
+ PGLog pg_log;
static string get_info_key(pg_t pgid) {
return stringify(pgid) + "_info";
}
@@ -478,8 +275,6 @@ public:
}
hobject_t log_oid;
hobject_t biginfo_oid;
- OndiskLog ondisklog;
- pg_missing_t missing;
map<hobject_t, set<int> > missing_loc;
set<int> missing_loc_sources; // superset of missing_loc locations
@@ -784,16 +579,6 @@ public:
bool proc_replica_info(int from, const pg_info_t &info);
void remove_snap_mapped_object(
ObjectStore::Transaction& t, const hobject_t& soid);
- bool merge_old_entry(ObjectStore::Transaction& t, pg_log_entry_t& oe);
-
- /**
- * Merges authoratative log/info into current log/info/store
- *
- * @param [in,out] t used to delete obsolete objects
- * @param [in,out] oinfo recieved authoritative info
- * @param [in,out] olog recieved authoritative log
- * @param [in] from peer which sent the information
- */
void merge_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, int from);
void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
bool search_for_missing(const pg_info_t &oinfo, const pg_missing_t *omissing,
@@ -822,10 +607,10 @@ public:
void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
bool have_unfound() const {
- return missing.num_missing() > missing_loc.size();
+ return pg_log.get_missing().num_missing() > missing_loc.size();
}
int get_num_unfound() const {
- return missing.num_missing() - missing_loc.size();
+ return pg_log.get_missing().num_missing() - missing_loc.size();
}
virtual void clean_up_local(ObjectStore::Transaction& t) = 0;
@@ -1878,35 +1663,18 @@ private:
void write_log(ObjectStore::Transaction& t);
public:
- static void clear_info_log(
- pg_t pgid,
- const hobject_t &infos_oid,
- const hobject_t &log_oid,
- ObjectStore::Transaction *t);
-
static int _write_info(ObjectStore::Transaction& t, epoch_t epoch,
pg_info_t &info, coll_t coll,
map<epoch_t,pg_interval_t> &past_intervals,
interval_set<snapid_t> &snap_collections,
hobject_t &infos_oid,
__u8 info_struct_v, bool dirty_big_info, bool force_ver = false);
- static void _write_log(ObjectStore::Transaction& t, pg_log_t &log,
- const hobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors);
void write_if_dirty(ObjectStore::Transaction& t);
void add_log_entry(pg_log_entry_t& e, bufferlist& log_bl);
void append_log(
vector<pg_log_entry_t>& logv, eversion_t trim_to, ObjectStore::Transaction &t);
-
- /// return true if the log should be rewritten
- static bool read_log(ObjectStore *store, coll_t coll, hobject_t log_oid,
- const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
- pg_missing_t &missing, ostringstream &oss, const PG *passedpg = NULL);
- static void read_log_old(ObjectStore *store, coll_t coll, hobject_t log_oid,
- const pg_info_t &info, OndiskLog &ondisklog, IndexedLog &log,
- pg_missing_t &missing, ostringstream &oss, const PG *passedpg = NULL);
bool check_log_for_corruption(ObjectStore *store);
- void trim(ObjectStore::Transaction& t, eversion_t v);
void trim_peers();
std::string get_corrupt_pg_log_name() const;
@@ -2027,8 +1795,6 @@ public:
virtual void on_shutdown() = 0;
};
-WRITE_CLASS_ENCODER(PG::OndiskLog)
-
ostream& operator<<(ostream& out, const PG& pg);
#endif