diff options
author | Sage Weil <sage@inktank.com> | 2013-01-04 20:48:12 -0800 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-01-04 20:48:12 -0800 |
commit | 415294c0f9f5fad7abe2f6cb8d325669c5acadea (patch) | |
tree | 7196f978cc7b3c610b38d7131be57588771b8ce0 | |
parent | 3a9408742a8a6cbc870cba543a208285f1a6cec1 (diff) | |
parent | 988a52173522e9a410ba975a4e8b7c25c7801123 (diff) | |
download | ceph-415294c0f9f5fad7abe2f6cb8d325669c5acadea.tar.gz |
Merge branch 'next'
-rw-r--r-- | src/include/rados.h | 10 | ||||
-rw-r--r-- | src/os/FileStore.cc | 21 | ||||
-rw-r--r-- | src/osd/OSD.cc | 58 | ||||
-rw-r--r-- | src/osd/OSD.h | 2 | ||||
-rw-r--r-- | src/osd/OSDMap.cc | 2 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 12 | ||||
-rw-r--r-- | src/osdc/Objecter.cc | 2 | ||||
-rw-r--r-- | src/test/cli/osdmaptool/clobber.t | 4 | ||||
-rw-r--r-- | src/test/cli/osdmaptool/create-print.t | 2 |
9 files changed, 72 insertions, 41 deletions
diff --git a/src/include/rados.h b/src/include/rados.h index b1eabdf34de..4f7d7174c47 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -141,6 +141,10 @@ extern const char *ceph_osd_state_name(int s); /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -244,7 +248,8 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ - CEPH_OSD_OP_CALL = CEPH_OSD_OP_TYPE_EXEC | 1, + /* note: the RD bit here is wrong; see special-case below in helper */ + CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, @@ -282,7 +287,8 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return op & CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 99cea7f22da..533bf048978 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -3353,11 +3353,6 @@ void FileStore::sync_entry() sync_epoch++; dout(15) << "sync_entry committing " << cp << " sync_epoch " << sync_epoch << dendl; - int err = write_op_seq(op_fd, cp); - if (err < 0) { - derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; - assert(0); - } stringstream errstream; if (g_conf->filestore_debug_omap_check && !object_map->check(errstream)) { derr << errstream.str() << dendl; @@ -3365,6 +3360,11 @@ void FileStore::sync_entry() } if (btrfs_stable_commits) { + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } if (btrfs_snap_create_v2) { // be smart! @@ -3446,6 +3446,17 @@ void FileStore::sync_entry() dout(15) << "sync_entry doing a full sync (syncfs(2) if possible)" << dendl; sync_filesystem(basedir_fd); } + + int err = write_op_seq(op_fd, cp); + if (err < 0) { + derr << "Error during write_op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during write_op_seq"); + } + err = ::fsync(op_fd); + if (err < 0) { + derr << "Error during fsync of op_seq: " << cpp_strerror(err) << dendl; + assert(0 == "error during fsync of op_seq"); + } } utime_t done = ceph_clock_now(g_ceph_context); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5c96c6df80e..d0236e06419 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3956,8 +3956,14 @@ void OSD::handle_osd_map(MOSDMap *m) check_osdmap_features(); // yay! - if (is_active()) + consume_map(); + + if (!is_active()) { + dout(10) << " not yet active; waiting for peering wq to drain" << dendl; + peering_wq.drain(); + } else { activate_map(); + } if (m->newest_map && m->newest_map > last) { dout(10) << " msg say newest map is " << m->newest_map << ", requesting more" << dendl; @@ -4048,7 +4054,8 @@ void OSD::advance_pg( lastmap = nextmap; } - pg->handle_activate_map(rctx); + if (!is_booting()) + pg->handle_activate_map(rctx); } /** @@ -4135,20 +4142,12 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin) } } -void OSD::activate_map() +void OSD::consume_map() { assert(osd_lock.is_locked()); - - dout(7) << "activate_map version " << osdmap->get_epoch() << dendl; - - map< int, vector<pair<pg_notify_t,pg_interval_map_t> > > notify_list; // primary -> list - map< int, map<pg_t,pg_query_t> > query_map; // peer -> PG -> get_summary_since - map<int,MOSDPGInfo*> info_map; // peer -> message + dout(7) << "consume_map version " << osdmap->get_epoch() << dendl; int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0; - - epoch_t oldest_last_clean = osdmap->get_epoch(); - list<PG*> to_remove; service.expand_pg_num(service.get_osdmap(), @@ -4167,9 +4166,6 @@ void OSD::activate_map() else num_pg_stray++; - if (pg->is_primary() && pg->info.history.last_epoch_clean < oldest_last_clean) - oldest_last_clean = pg->info.history.last_epoch_clean; - set<pg_t> split_pgs; if (!osdmap->have_pg_pool(pg->info.pgid.pool())) { //pool is deleted! @@ -4207,12 +4203,18 @@ void OSD::activate_map() pg->queue_null(osdmap->get_epoch(), osdmap->get_epoch()); pg->unlock(); } - logger->set(l_osd_pg, pg_map.size()); logger->set(l_osd_pg_primary, num_pg_primary); logger->set(l_osd_pg_replica, num_pg_replica); logger->set(l_osd_pg_stray, num_pg_stray); +} + +void OSD::activate_map() +{ + assert(osd_lock.is_locked()); + + dout(7) << "activate_map version " << osdmap->get_epoch() << dendl; wake_all_pg_waiters(); // the pg mapping may have shifted maybe_update_heartbeat_peers(); @@ -4888,6 +4890,23 @@ void OSD::dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg) } } +bool OSD::compat_must_dispatch_immediately(PG *pg) +{ + assert(pg->is_locked()); + for (vector<int>::iterator i = pg->acting.begin(); + i != pg->acting.end(); + ++i) { + if (*i == whoami) + continue; + ConnectionRef conn = + service.get_con_osd_cluster(*i, pg->get_osdmap()->get_epoch()); + if (conn && !(conn->features & CEPH_FEATURE_INDEP_PG_MAP)) { + return true; + } + } + return false; +} + void OSD::dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap) { do_notifies(*ctx.notify_list, curmap); @@ -6158,7 +6177,12 @@ void OSD::process_peering_events(const list<PG*> &pgs) rctx.on_applied->add(new C_CompleteSplits(this, split_pgs)); split_pgs.clear(); } - dispatch_context_transaction(rctx, pg); + if (compat_must_dispatch_immediately(pg)) { + dispatch_context(rctx, pg, curmap); + rctx = create_context(); + } else { + dispatch_context_transaction(rctx, pg); + } pg->unlock(); } if (need_up_thru) diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 05b1978b429..8bab8a99059 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -772,6 +772,7 @@ private: epoch_t advance_to, PG *pg, PG::RecoveryCtx *rctx, set<boost::intrusive_ptr<PG> > *split_pgs); void advance_map(ObjectStore::Transaction& t, C_Contexts *tfin); + void consume_map(); void activate_map(); // osd map cache (past osd maps) @@ -968,6 +969,7 @@ protected: // -- generic pg peering -- PG::RecoveryCtx create_context(); + bool compat_must_dispatch_immediately(PG *pg); void dispatch_context(PG::RecoveryCtx &ctx, PG *pg, OSDMapRef curmap); void dispatch_context_transaction(PG::RecoveryCtx &ctx, PG *pg); void do_notifies(map< int,vector<pair<pg_notify_t, pg_interval_map_t> > >& notify_list, diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 03ecad78dcc..63a1fae809b 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1382,7 +1382,7 @@ void OSDMap::print(ostream& out) const out << "epoch " << get_epoch() << "\n" << "fsid " << get_fsid() << "\n" << "created " << get_created() << "\n" - << "modifed " << get_modified() << "\n"; + << "modified " << get_modified() << "\n"; out << "flags " << get_flag_string() << "\n"; if (get_cluster_snapshot().length()) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 59e055d5829..cbae003827c 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5073,18 +5073,6 @@ void ReplicatedPG::push_to_replica( if (soid.snap && soid.snap < CEPH_NOSNAP) { hobject_t head = soid; head.snap = CEPH_NOSNAP; - if (peer_missing[peer].is_missing(head) && - peer_missing[peer].have_old(head) == oi.prior_version) { - dout(10) << "push_to_replica osd." << peer << " has correct old " << head - << " v" << oi.prior_version - << ", pushing " << soid << " attrs as a clone op" << dendl; - interval_set<uint64_t> data_subset; - map<hobject_t, interval_set<uint64_t> > clone_subsets; - if (size) - clone_subsets[head].insert(0, size); - push_start(prio, obc, soid, peer, oi.version, data_subset, clone_subsets); - return; - } // try to base push off of clones that succeed/preceed poid // we need the head (and current SnapSet) locally to do that. diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 6abada8f5d8..04a74b87b66 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1287,7 +1287,7 @@ int Objecter::calc_op_budget(Op *op) ++i) { if (i->op.op & CEPH_OSD_OP_MODE_WR) { op_budget += i->indata.length(); - } else if (i->op.op & CEPH_OSD_OP_MODE_RD) { + } else if (ceph_osd_op_mode_read(i->op.op)) { if (ceph_osd_op_type_data(i->op.op)) { if ((int64_t)i->op.extent.length > 0) op_budget += (int64_t)i->op.extent.length; diff --git a/src/test/cli/osdmaptool/clobber.t b/src/test/cli/osdmaptool/clobber.t index 85cdd179d72..46194db9ffb 100644 --- a/src/test/cli/osdmaptool/clobber.t +++ b/src/test/cli/osdmaptool/clobber.t @@ -16,7 +16,7 @@ epoch 1 fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re) created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) - modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) + modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 crash_replay_interval 45 @@ -38,7 +38,7 @@ epoch 1 fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re) created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) - modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) + modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 owner 0 crash_replay_interval 45 diff --git a/src/test/cli/osdmaptool/create-print.t b/src/test/cli/osdmaptool/create-print.t index 93cdee21196..a01d27d69fa 100644 --- a/src/test/cli/osdmaptool/create-print.t +++ b/src/test/cli/osdmaptool/create-print.t @@ -7,7 +7,7 @@ epoch 1 fsid [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} (re) created \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) - modifed \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) + modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags pool 0 'data' rep size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 crash_replay_interval 45 |