summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-01-04 20:48:12 -0800
committerSage Weil <sage@inktank.com>2013-01-04 20:48:12 -0800
commit415294c0f9f5fad7abe2f6cb8d325669c5acadea (patch)
tree7196f978cc7b3c610b38d7131be57588771b8ce0
parent3a9408742a8a6cbc870cba543a208285f1a6cec1 (diff)
parent988a52173522e9a410ba975a4e8b7c25c7801123 (diff)
downloadceph-415294c0f9f5fad7abe2f6cb8d325669c5acadea.tar.gz
Merge branch 'next'
-rw-r--r--src/include/rados.h10
-rw-r--r--src/os/FileStore.cc21
-rw-r--r--src/osd/OSD.cc58
-rw-r--r--src/osd/OSD.h2
-rw-r--r--src/osd/OSDMap.cc2
-rw-r--r--src/osd/ReplicatedPG.cc12
-rw-r--r--src/osdc/Objecter.cc2
-rw-r--r--src/test/cli/osdmaptool/clobber.t4
-rw-r--r--src/test/cli/osdmaptool/create-print.t2
9 files changed, 72 insertions, 41 deletions
diff --git a/src/include/rados.h b/src/include/rados.h
index b1eabdf34de..4f7d7174c47 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -141,6 +141,10 @@ extern const char *ceph_osd_state_name(int s);
/*
* osd ops
+ *
+ * WARNING: do not use these op codes directly. Use the helpers
+ * defined below instead. In certain cases, op code behavior was
+ * redefined, resulting in special-cases in the helpers.
*/
#define CEPH_OSD_OP_MODE 0xf000
#define CEPH_OSD_OP_MODE_RD 0x1000
@@ -244,7 +248,8 @@ enum {
CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
/** exec **/
- CEPH_OSD_OP_CALL = CEPH_OSD_OP_TYPE_EXEC | 1,
+ /* note: the RD bit here is wrong; see special-case below in helper */
+ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
/** pg **/
CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
@@ -282,7 +287,8 @@ static inline int ceph_osd_op_mode_subop(int op)
}
static inline int ceph_osd_op_mode_read(int op)
{
- return op & CEPH_OSD_OP_MODE_RD;
+ return (op & CEPH_OSD_OP_MODE_RD) &&
+ op != CEPH_OSD_OP_CALL;
}
static inline int ceph_osd_op_mode_modify(int op)
{
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 99cea7f22da..533bf048978 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -3353,11 +3353,6 @@ void FileStore::sync_entry()
sync_epoch++;
dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl;
- int err = write_op_seq(op_fd, cp);
- if (err < 0) {
- derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
- assert(0);
- }
stringstream errstream;
if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) {
derr << errstream.str() << dendl;
@@ -3365,6 +3360,11 @@ void FileStore::sync_entry()
}
if (btrfs_stable_commits) {
+ int err = write_op_seq(op_fd, cp);
+ if (err < 0) {
+ derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during write_op_seq");
+ }
if (btrfs_snap_create_v2) {
// be smart!
@@ -3446,6 +3446,17 @@ void FileStore::sync_entry()
dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl;
sync_filesystem(basedir_fd);
}
+
+ int err = write_op_seq(op_fd, cp);
+ if (err < 0) {
+ derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during write_op_seq");
+ }
+ err = ::fsync(op_fd);
+ if (err < 0) {
+ derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl;
+ assert(0 == "error during fsync of op_seq");
+ }
}
utime_t done = ceph_clock_now(g_ceph_context);
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 5c96c6df80e..d0236e06419 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -3956,8 +3956,14 @@ void OSD::handle_osd_map(MOSDMap *m)
check_osdmap_features();
// yay!
- if (is_active())
+ consume_map();
+
+ if (!is_active()) {
+ dout(10) << " not yet active; waiting for peering wq to drain" << dendl;
+ peering_wq.drain();
+ } else {
activate_map();
+ }
if (m->newest_map && m->newest_map > last) {
dout(10) << " msg say newest map is " << m->newest_map << ", requesting more" << dendl;
@@ -4048,7 +4054,8 @@ void OSD::advance_pg(
lastmap = nextmap;
}
- pg->handle_activate_map(rctx);
+ if (!is_booting())
+ pg->handle_activate_map(rctx);
}
/**
@@ -4135,20 +4142,12 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin)
}
}
-void OSD::activate_map()
+void OSD::consume_map()
{
assert(osd_lock.is_locked());
-
- dout(7) << "activate_map version " << osdmap->get_epoch() << dendl;
-
- map< int, vector<pair<pg_notify_t,pg_interval_map_t> > > notify_list; // primary -> list
- map< int, map<pg_t,pg_query_t> > query_map; // peer -> PG -> get_summary_since
- map<int,MOSDPGInfo*> info_map; // peer -> message
+ dout(7) << "consume_map version " << osdmap->get_epoch() << dendl;
int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0;
-
- epoch_t oldest_last_clean = osdmap->get_epoch();
-
list<PG*> to_remove;
service.expand_pg_num(service.get_osdmap(),
@@ -4167,9 +4166,6 @@ void OSD::activate_map()
else
num_pg_stray++;
- if (pg->is_primary() && pg->info.history.last_epoch_clean < oldest_last_clean)
- oldest_last_clean = pg->info.history.last_epoch_clean;
-
set<pg_t> split_pgs;
if (!osdmap->have_pg_pool(pg->info.pgid.pool())) {
//pool is deleted!
@@ -4207,12 +4203,18 @@ void OSD::activate_map()
pg->queue_null(osdmap->get_epoch(), osdmap->get_epoch());
pg->unlock();
}
-
logger->set(l_osd_pg, pg_map.size());
logger->set(l_osd_pg_primary, num_pg_primary);
logger->set(l_osd_pg_replica, num_pg_replica);
logger->set(l_osd_pg_stray, num_pg_stray);
+}
+
+void OSD::activate_map()
+{
+ assert(osd_lock.is_locked());
+
+ dout(7) << "activate_map version " << osdmap->get_epoch() << dendl;
wake_all_pg_waiters(); // the pg mapping may have shifted
maybe_update_heartbeat_peers();
@@ -4888,6 +4890,23 @@ void OSD::dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg)
}
}
+bool OSD::compat_must_dispatch_immediately(PG *pg)
+{
+ assert(pg->is_locked());
+ for (vector<int>::iterator i = pg->acting.begin();
+ i != pg->acting.end();
+ ++i) {
+ if (*i == whoami)
+ continue;
+ ConnectionRef conn =
+ service.get_con_osd_cluster(*i, pg->get_osdmap()->get_epoch());
+ if (conn && !(conn->features & CEPH_FEATURE_INDEP_PG_MAP)) {
+ return true;
+ }
+ }
+ return false;
+}
+
void OSD::dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap)
{
do_notifies(*ctx.notify_list, curmap);
@@ -6158,7 +6177,12 @@ void OSD::process_peering_events(const list<PG*> &pgs)
rctx.on_applied->add(new C_CompleteSplits(this, split_pgs));
split_pgs.clear();
}
- dispatch_context_transaction(rctx, pg);
+ if (compat_must_dispatch_immediately(pg)) {
+ dispatch_context(rctx, pg, curmap);
+ rctx = create_context();
+ } else {
+ dispatch_context_transaction(rctx, pg);
+ }
pg->unlock();
}
if (need_up_thru)
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 05b1978b429..8bab8a99059 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -772,6 +772,7 @@ private:
epoch_t advance_to, PG *pg, PG::RecoveryCtx *rctx,
set<boost::intrusive_ptr<PG> > *split_pgs);
void advance_map(ObjectStore::Transaction& t, C_Contexts *tfin);
+ void consume_map();
void activate_map();
// osd map cache (past osd maps)
@@ -968,6 +969,7 @@ protected:
// -- generic pg peering --
PG::RecoveryCtx create_context();
+ bool compat_must_dispatch_immediately(PG *pg);
void dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap);
void dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg);
void do_notifies(map< int,vector<pair<pg_notify_t, pg_interval_map_t> > >& notify_list,
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 03ecad78dcc..63a1fae809b 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1382,7 +1382,7 @@ void OSDMap::print(ostream& out) const
out << "epoch " << get_epoch() << "\n"
<< "fsid " << get_fsid() << "\n"
<< "created " << get_created() << "\n"
- << "modifed " << get_modified() << "\n";
+ << "modified " << get_modified() << "\n";
out << "flags " << get_flag_string() << "\n";
if (get_cluster_snapshot().length())
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 59e055d5829..cbae003827c 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -5073,18 +5073,6 @@ void ReplicatedPG::push_to_replica(
if (soid.snap && soid.snap < CEPH_NOSNAP) {
hobject_t head = soid;
head.snap = CEPH_NOSNAP;
- if (peer_missing[peer].is_missing(head) &&
- peer_missing[peer].have_old(head) == oi.prior_version) {
- dout(10) << "push_to_replica osd." << peer << " has correct old " << head
- << " v" << oi.prior_version
- << ", pushing " << soid << " attrs as a clone op" << dendl;
- interval_set<uint64_t> data_subset;
- map<hobject_t, interval_set<uint64_t> > clone_subsets;
- if (size)
- clone_subsets[head].insert(0, size);
- push_start(prio, obc, soid, peer, oi.version, data_subset, clone_subsets);
- return;
- }
// try to base push off of clones that succeed/preceed poid
// we need the head (and current SnapSet) locally to do that.
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 6abada8f5d8..04a74b87b66 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -1287,7 +1287,7 @@ int Objecter::calc_op_budget(Op *op)
++i) {
if (i->op.op & CEPH_OSD_OP_MODE_WR) {
op_budget += i->indata.length();
- } else if (i->op.op & CEPH_OSD_OP_MODE_RD) {
+ } else if (ceph_osd_op_mode_read(i->op.op)) {
if (ceph_osd_op_type_data(i->op.op)) {
if ((int64_t)i->op.extent.length > 0)
op_budget += (int64_t)i->op.extent.length;
diff --git a/src/test/cli/osdmaptool/clobber.t b/src/test/cli/osdmaptool/clobber.t
index 85cdd179d72..46194db9ffb 100644
--- a/src/test/cli/osdmaptool/clobber.t
+++ b/src/test/cli/osdmaptool/clobber.t
@@ -16,7 +16,7 @@
epoch 1
fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re)
created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
- modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
+ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
flags
pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 crash_replay_interval 45
@@ -38,7 +38,7 @@
epoch 1
fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re)
created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
- modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
+ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
flags
pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 owner 0 crash_replay_interval 45
diff --git a/src/test/cli/osdmaptool/create-print.t b/src/test/cli/osdmaptool/create-print.t
index 93cdee21196..a01d27d69fa 100644
--- a/src/test/cli/osdmaptool/create-print.t
+++ b/src/test/cli/osdmaptool/create-print.t
@@ -7,7 +7,7 @@
epoch 1
fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re)
created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
- modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
+ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
flags
pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 crash_replay_interval 45