diff options
author | Sage Weil <sage@inktank.com> | 2013-05-28 22:10:21 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-28 22:10:21 -0700 |
commit | 6afc22a158fea9132479cf7407c13da1504eceb5 (patch) | |
tree | 0a89132a8d10c3d5d1b8ddae2ae77126534a17e1 | |
parent | b6be785775442af1999b2543bd07a0d28391dbc5 (diff) | |
parent | 054e96cf79e960894ef7e33a4d13635d3ad2a1b9 (diff) | |
download | ceph-6afc22a158fea9132479cf7407c13da1504eceb5.tar.gz |
Merge remote-tracking branch 'gh/last'
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | src/cls/rgw/cls_rgw.cc | 19 | ||||
-rw-r--r-- | src/common/config_opts.h | 1 | ||||
-rw-r--r-- | src/mon/MonmapMonitor.cc | 2 | ||||
-rw-r--r-- | src/mon/Paxos.cc | 26 | ||||
-rw-r--r-- | src/mon/Paxos.h | 5 | ||||
-rw-r--r-- | src/mon/PaxosService.h | 7 | ||||
-rw-r--r-- | src/os/HashIndex.cc | 15 | ||||
-rw-r--r-- | src/osd/OSD.h | 12 | ||||
-rw-r--r-- | src/osd/PG.cc | 60 | ||||
-rw-r--r-- | src/osd/PG.h | 26 |
12 files changed, 111 insertions, 70 deletions
diff --git a/configure.ac b/configure.ac index 8a427decd24..36b05b8f410 100644 --- a/configure.ac +++ b/configure.ac @@ -8,7 +8,7 @@ AC_PREREQ(2.59) # VERSION define is not used by the code. It gets a version string # from 'git describe'; see src/ceph_ver.[ch] -AC_INIT([ceph], [0.62], [ceph-devel@vger.kernel.org]) +AC_INIT([ceph], [0.63], [ceph-devel@vger.kernel.org]) # Create release string. Used with VERSION for RPMs. RPM_RELEASE=0 diff --git a/debian/changelog b/debian/changelog index 41460b200c6..93483e52b39 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +ceph (0.63-1) precise; urgency=low + + * New upstream release + + -- Gary Lowell <gary.lowell@inktank.com> Tue, 28 May 2013 13:57:53 -0700 + ceph (0.62) precise; urgency=low * New upstream release diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 15498ef0aa6..cf81440f7fb 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -586,6 +586,13 @@ static void usage_record_prefix_by_time(uint64_t epoch, string& key) key = buf; } +static void usage_record_prefix_by_user(string& user, uint64_t epoch, string& key) +{ + char buf[user.size() + 32]; + snprintf(buf, sizeof(buf), "%s_%011llu_", user.c_str(), (long long unsigned)epoch); + key = buf; +} + static void usage_record_name_by_time(uint64_t epoch, string& user, string& bucket, string& key) { char buf[32 + user.size() + bucket.size()]; @@ -695,7 +702,7 @@ static int usage_iterate_range(cls_method_context_t hctx, uint64_t start, uint64 if (key_iter.empty()) { if (by_user) { - start_key = user; + usage_record_prefix_by_user(user, start, start_key); } else { usage_record_prefix_by_time(start, start_key); } @@ -704,6 +711,7 @@ static int usage_iterate_range(cls_method_context_t hctx, uint64_t start, uint64 } do { + CLS_LOG(20, "usage_iterate_range start_key=%s", start_key.c_str()); int ret = cls_cxx_map_get_vals(hctx, start_key, filter_prefix, NUM_KEYS, &keys); if (ret < 0) return ret; @@ -717,11 +725,15 @@ static int usage_iterate_range(cls_method_context_t hctx, uint64_t start, uint64 const string& key = iter->first; rgw_usage_log_entry e; - if (!by_user && key.compare(end_key) >= 0) + if (!by_user && key.compare(end_key) >= 0) { + CLS_LOG(20, "usage_iterate_range reached key=%s, done", key.c_str()); return 0; + } - if (by_user && key.compare(0, user_key.size(), user_key) != 0) + if (by_user && key.compare(0, user_key.size(), user_key) != 0) { + CLS_LOG(20, "usage_iterate_range reached key=%s, done", key.c_str()); return 0; + } ret = usage_record_decode(iter->second, e); if (ret < 0) @@ -741,6 +753,7 @@ static int usage_iterate_range(cls_method_context_t hctx, uint64_t start, uint64 i++; if (max_entries && (i > max_entries)) { + CLS_LOG(20, "usage_iterate_range reached max_entries (%d), done", max_entries); *truncated = true; key_iter = key; return 0; diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 0cf9121a192..421b4f92283 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -425,6 +425,7 @@ OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week OPTION(osd_deep_scrub_stride, OPT_INT, 524288) +OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100) OPTION(osd_auto_weight, OPT_BOOL, false) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored OPTION(osd_check_for_log_corruption, OPT_BOOL, false) diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index badac7e0922..d7472797f15 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -111,7 +111,7 @@ void MonmapMonitor::update_from_paxos() } if (need_restart) { - paxos->prepare_bootstrap(); + mon->bootstrap(); } } diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 71ef2ec3de0..3311d7bae93 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -37,13 +37,6 @@ static ostream& _prefix(std::ostream *_dout, Monitor *mon, const string& name, << ") "; } -void Paxos::prepare_bootstrap() -{ - dout(0) << __func__ << dendl; - - going_to_bootstrap = true; -} - MonitorDBStore *Paxos::get_store() { return mon->store; @@ -445,6 +438,8 @@ void Paxos::handle_last(MMonPaxos *last) dout(10) << "that's everyone. active!" << dendl; extend_lease(); + finish_proposal(); + finish_contexts(g_ceph_context, waiting_for_active); finish_contexts(g_ceph_context, waiting_for_readable); finish_contexts(g_ceph_context, waiting_for_writeable); @@ -834,12 +829,6 @@ void Paxos::finish_proposal() first_committed = get_store()->get(get_name(), "first_committed"); last_committed = get_store()->get(get_name(), "last_committed"); - if (proposals.empty() && going_to_bootstrap) { - dout(0) << __func__ << " no more proposals; bootstraping." << dendl; - mon->bootstrap(); - return; - } - if (should_trim()) { trim(); } @@ -1085,16 +1074,15 @@ void Paxos::shutdown() { finish_contexts(g_ceph_context, waiting_for_commit, -ECANCELED); finish_contexts(g_ceph_context, waiting_for_readable, -ECANCELED); finish_contexts(g_ceph_context, waiting_for_active, -ECANCELED); + finish_contexts(g_ceph_context, proposals, -ECANCELED); } void Paxos::leader_init() { cancel_events(); new_value.clear(); - if (!proposals.empty()) - proposals.clear(); - going_to_bootstrap = false; + finish_contexts(g_ceph_context, proposals, -EAGAIN); if (mon->get_quorum().size() == 1) { state = STATE_ACTIVE; @@ -1119,6 +1107,7 @@ void Paxos::peon_init() // no chance to write now! finish_contexts(g_ceph_context, waiting_for_writeable, -EAGAIN); finish_contexts(g_ceph_context, waiting_for_commit, -EAGAIN); + finish_contexts(g_ceph_context, proposals, -EAGAIN); } void Paxos::restart() @@ -1126,13 +1115,10 @@ void Paxos::restart() dout(10) << "restart -- canceling timeouts" << dendl; cancel_events(); new_value.clear(); - dout(10) << __func__ << " -- clearing queued proposals" << dendl; - if (!proposals.empty()) - proposals.clear(); state = STATE_RECOVERING; - going_to_bootstrap = false; + finish_contexts(g_ceph_context, proposals, -EAGAIN); finish_contexts(g_ceph_context, waiting_for_commit, -EAGAIN); finish_contexts(g_ceph_context, waiting_for_active, -EAGAIN); } diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 2e1bb62dda9..160b02ecef2 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -530,7 +530,6 @@ private: * @} */ - bool going_to_bootstrap; /** * Should be true if we have proposed to trim, or are in the middle of * trimming; false otherwise. @@ -1017,7 +1016,6 @@ public: lease_timeout_event(0), accept_timeout_event(0), clock_drift_warned(0), - going_to_bootstrap(false), going_to_trim(false), trim_disabled_version(0) { } @@ -1025,9 +1023,6 @@ public: return paxos_name; } - bool is_bootstrapping() { return going_to_bootstrap; } - void prepare_bootstrap(); - void dispatch(PaxosServiceMessage *m); void reapply_all_versions(); diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index def0a85e7f6..2008dd6598f 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -498,8 +498,7 @@ public: */ bool is_active() { return (!is_proposing() && !paxos->is_recovering() - && !paxos->is_locked() - && !paxos->is_bootstrapping()); + && !paxos->is_locked()); } /** @@ -579,7 +578,7 @@ public: * @param c The callback to be awaken once we become active. */ void wait_for_active(Context *c) { - if (paxos->is_bootstrapping() || !is_proposing()) { + if (!is_proposing()) { paxos->wait_for_active(c); return; } @@ -612,7 +611,7 @@ public: * @param c The callback to be awaken once we become writeable. */ void wait_for_writeable(Context *c) { - if (paxos->is_bootstrapping() || !is_proposing()) { + if (!is_proposing()) { paxos->wait_for_writeable(c); return; } diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc index 56b2c017d03..17b0f0388b9 100644 --- a/src/os/HashIndex.cc +++ b/src/os/HashIndex.cc @@ -368,21 +368,30 @@ int HashIndex::start_col_split(const vector<string> &path) { bufferlist bl; InProgressOp op_tag(InProgressOp::COL_SPLIT, path); op_tag.encode(bl); - return add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + if (r < 0) + return r; + return fsync_dir(vector<string>()); } int HashIndex::start_split(const vector<string> &path) { bufferlist bl; InProgressOp op_tag(InProgressOp::SPLIT, path); op_tag.encode(bl); - return add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + if (r < 0) + return r; + return fsync_dir(vector<string>()); } int HashIndex::start_merge(const vector<string> &path) { bufferlist bl; InProgressOp op_tag(InProgressOp::MERGE, path); op_tag.encode(bl); - return add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + int r = add_attr_path(vector<string>(), IN_PROGRESS_OP_TAG, bl); + if (r < 0) + return r; + return fsync_dir(vector<string>()); } int HashIndex::end_split_or_merge(const vector<string> &path) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 428284c85ab..99d75dc40ad 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1420,8 +1420,10 @@ protected: osd->scrub_queue.pop_front(); return pg; } - void _process(PG *pg) { - pg->scrub(); + void _process( + PG *pg, + ThreadPool::TPHandle &handle) { + pg->scrub(handle); pg->put("ScrubWQ"); } void _clear() { @@ -1505,7 +1507,9 @@ protected: rep_scrub_queue.pop_front(); return msg; } - void _process(MOSDRepScrub *msg) { + void _process( + MOSDRepScrub *msg, + ThreadPool::TPHandle &handle) { osd->osd_lock.Lock(); if (osd->is_stopping()) { osd->osd_lock.Unlock(); @@ -1514,7 +1518,7 @@ protected: if (osd->_have_pg(msg->pgid)) { PG *pg = osd->_lookup_lock_pg(msg->pgid); osd->osd_lock.Unlock(); - pg->replica_scrub(msg); + pg->replica_scrub(msg, handle); msg->put(); pg->unlock(); } else { diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fdc5701bc87..da6a68ed387 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3263,7 +3263,9 @@ void PG::sub_op_scrub_map(OpRequestRef op) /* * pg lock may or may not be held */ -void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep) +void PG::_scan_list( + ScrubMap &map, vector<hobject_t> &ls, bool deep, + ThreadPool::TPHandle &handle) { dout(10) << "_scan_list scanning " << ls.size() << " objects" << (deep ? " deeply" : "") << dendl; @@ -3271,6 +3273,7 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep) for (vector<hobject_t>::iterator p = ls.begin(); p != ls.end(); ++p, i++) { + handle.reset_tp_timeout(); hobject_t poid = *p; struct stat st; @@ -3290,6 +3293,7 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep) while ( (r = osd->store->read(coll, poid, pos, g_conf->osd_deep_scrub_stride, bl, true)) > 0) { + handle.reset_tp_timeout(); h << bl; pos += bl.length(); bl.clear(); @@ -3319,7 +3323,14 @@ void PG::_scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep) ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator( coll, poid); assert(iter); + uint64_t keys_scanned = 0; for (iter->seek_to_first(); iter->valid() ; iter->next()) { + if (g_conf->osd_scan_list_ping_tp_interval && + (keys_scanned % g_conf->osd_scan_list_ping_tp_interval == 0)) { + handle.reset_tp_timeout(); + } + ++keys_scanned; + dout(25) << "CRC key " << iter->key() << " value " << string(iter->value().c_str(), iter->value().length()) << dendl; @@ -3596,8 +3607,10 @@ void PG::_scan_snaps(ScrubMap &smap) * build a scrub map over a chunk without releasing the lock * only used by chunky scrub */ -int PG::build_scrub_map_chunk(ScrubMap &map, - hobject_t start, hobject_t end, bool deep) +int PG::build_scrub_map_chunk( + ScrubMap &map, + hobject_t start, hobject_t end, bool deep, + ThreadPool::TPHandle &handle) { dout(10) << "build_scrub_map" << dendl; dout(20) << "scrub_map_chunk [" << start << "," << end << ")" << dendl; @@ -3612,7 +3625,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, return ret; } - _scan_list(map, ls, deep); + _scan_list(map, ls, deep, handle); _scan_snaps(map); // pg attrs @@ -3629,7 +3642,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, * build a (sorted) summary of pg content for purposes of scrubbing * called while holding pg lock */ -void PG::build_scrub_map(ScrubMap &map) +void PG::build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle) { dout(10) << "build_scrub_map" << dendl; @@ -3646,7 +3659,7 @@ void PG::build_scrub_map(ScrubMap &map) vector<hobject_t> ls; osd->store->collection_list(coll, ls); - _scan_list(map, ls, false); + _scan_list(map, ls, false, handle); lock(); _scan_snaps(map); @@ -3671,7 +3684,9 @@ void PG::build_scrub_map(ScrubMap &map) * build a summary of pg content changed starting after v * called while holding pg lock */ -void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v) +void PG::build_inc_scrub_map( + ScrubMap &map, eversion_t v, + ThreadPool::TPHandle &handle) { map.valid_through = last_update_applied; map.incr_since = v; @@ -3695,7 +3710,7 @@ void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v) } } - _scan_list(map, ls, false); + _scan_list(map, ls, false, handle); // pg attrs osd->store->collection_getattrs(coll, map.attrs); @@ -3743,7 +3758,9 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer * for pushes to complete in case of recent recovery. Build a single * scrubmap of objects that are in the range [msg->start, msg->end). */ -void PG::replica_scrub(MOSDRepScrub *msg) +void PG::replica_scrub( + MOSDRepScrub *msg, + ThreadPool::TPHandle &handle) { assert(!scrubber.active_rep_scrub); dout(7) << "replica_scrub" << dendl; @@ -3777,7 +3794,9 @@ void PG::replica_scrub(MOSDRepScrub *msg) return; } - build_scrub_map_chunk(map, msg->start, msg->end, msg->deep); + build_scrub_map_chunk( + map, msg->start, msg->end, msg->deep, + handle); } else { if (msg->scrub_from > eversion_t()) { @@ -3792,10 +3811,10 @@ void PG::replica_scrub(MOSDRepScrub *msg) return; } } - build_inc_scrub_map(map, msg->scrub_from); + build_inc_scrub_map(map, msg->scrub_from, handle); scrubber.finalizing = 0; } else { - build_scrub_map(map); + build_scrub_map(map, handle); } if (msg->map_epoch < info.history.same_interval_since) { @@ -3823,7 +3842,7 @@ void PG::replica_scrub(MOSDRepScrub *msg) * scrub will be chunky if all OSDs in PG support chunky scrub * scrub will fall back to classic in any other case */ -void PG::scrub() +void PG::scrub(ThreadPool::TPHandle &handle) { lock(); if (deleting) { @@ -3868,9 +3887,9 @@ void PG::scrub() } if (scrubber.is_chunky) { - chunky_scrub(); + chunky_scrub(handle); } else { - classic_scrub(); + classic_scrub(handle); } unlock(); @@ -3915,7 +3934,7 @@ void PG::scrub() * Flag set when we're in the finalize stage. * */ -void PG::classic_scrub() +void PG::classic_scrub(ThreadPool::TPHandle &handle) { if (!scrubber.active) { dout(10) << "scrub start" << dendl; @@ -3946,7 +3965,7 @@ void PG::classic_scrub() // Unlocks and relocks... scrubber.primary_scrubmap = ScrubMap(); - build_scrub_map(scrubber.primary_scrubmap); + build_scrub_map(scrubber.primary_scrubmap, handle); if (scrubber.epoch_start != info.history.same_interval_since) { dout(10) << "scrub pg changed, aborting" << dendl; @@ -3993,7 +4012,7 @@ void PG::classic_scrub() if (scrubber.primary_scrubmap.valid_through != log.head) { ScrubMap incr; - build_inc_scrub_map(incr, scrubber.primary_scrubmap.valid_through); + build_inc_scrub_map(incr, scrubber.primary_scrubmap.valid_through, handle); scrubber.primary_scrubmap.merge_incr(incr); } @@ -4076,7 +4095,7 @@ void PG::classic_scrub() * scrubber.state encodes the current state of the scrub (refer to state diagram * for details). */ -void PG::chunky_scrub() +void PG::chunky_scrub(ThreadPool::TPHandle &handle) { // check for map changes if (scrubber.is_chunky_scrub_active()) { @@ -4209,7 +4228,8 @@ void PG::chunky_scrub() // build my own scrub map ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end, - scrubber.deep); + scrubber.deep, + handle); if (ret < 0) { dout(5) << "error building scrub map: " << ret << ", aborting" << dendl; scrub_clear_state(); diff --git a/src/osd/PG.h b/src/osd/PG.h index b45379b32e1..8d8ad5c4c45 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -43,6 +43,7 @@ #include "messages/MOSDRepScrub.h" #include "messages/MOSDPGLog.h" #include "common/tracked_int_ptr.hpp" +#include "common/WorkQueue.h" #include <list> #include <memory> @@ -1030,24 +1031,29 @@ public: map<hobject_t, int> &authoritative, map<hobject_t, set<int> > &inconsistent_snapcolls, ostream &errorstream); - void scrub(); - void classic_scrub(); - void chunky_scrub(); + void scrub(ThreadPool::TPHandle &handle); + void classic_scrub(ThreadPool::TPHandle &handle); + void chunky_scrub(ThreadPool::TPHandle &handle); void scrub_compare_maps(); void scrub_process_inconsistent(); void scrub_finalize(); void scrub_finish(); void scrub_clear_state(); bool scrub_gather_replica_maps(); - void _scan_list(ScrubMap &map, vector<hobject_t> &ls, bool deep); + void _scan_list( + ScrubMap &map, vector<hobject_t> &ls, bool deep, + ThreadPool::TPHandle &handle); void _scan_snaps(ScrubMap &map); void _request_scrub_map_classic(int replica, eversion_t version); void _request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end, bool deep); - int build_scrub_map_chunk(ScrubMap &map, - hobject_t start, hobject_t end, bool deep); - void build_scrub_map(ScrubMap &map); - void build_inc_scrub_map(ScrubMap &map, eversion_t v); + int build_scrub_map_chunk( + ScrubMap &map, + hobject_t start, hobject_t end, bool deep, + ThreadPool::TPHandle &handle); + void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle); + void build_inc_scrub_map( + ScrubMap &map, eversion_t v, ThreadPool::TPHandle &handle); virtual void _scrub(ScrubMap &map) { } virtual void _scrub_clear_state() { } virtual void _scrub_finish() { } @@ -1066,7 +1072,9 @@ public: void reg_next_scrub(); void unreg_next_scrub(); - void replica_scrub(class MOSDRepScrub *op); + void replica_scrub( + class MOSDRepScrub *op, + ThreadPool::TPHandle &handle); void sub_op_scrub_map(OpRequestRef op); void sub_op_scrub_reserve(OpRequestRef op); void sub_op_scrub_reserve_reply(OpRequestRef op); |