diff options
author | Samuel Just <sam.just@inktank.com> | 2013-04-19 17:11:29 -0700 |
---|---|---|
committer | Samuel Just <sam.just@inktank.com> | 2013-04-19 17:11:31 -0700 |
commit | 481c532ff361b21e044621ac13c8f00ebfb1b3dc (patch) | |
tree | 5ff54d892cb0639fa27b7801fbe027bcb30bbaba | |
parent | ad845e61b9c0e19915de527e09ec1f8747b92df8 (diff) | |
parent | 88d9ee1d01b1c7835c1c83959c89d2153a56454d (diff) | |
download | ceph-481c532ff361b21e044621ac13c8f00ebfb1b3dc.tar.gz |
Merge branch 'wip_4662_clean' into next
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/common/AsyncReserver.h | 1 | ||||
-rw-r--r-- | src/common/tracked_int_ptr.hpp | 67 | ||||
-rw-r--r-- | src/os/FileStore.cc | 16 | ||||
-rw-r--r-- | src/osd/OSD.cc | 58 | ||||
-rw-r--r-- | src/osd/OSD.h | 79 | ||||
-rw-r--r-- | src/osd/OpRequest.cc | 3 | ||||
-rw-r--r-- | src/osd/PG.cc | 115 | ||||
-rw-r--r-- | src/osd/PG.h | 45 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 38 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.h | 23 |
12 files changed, 343 insertions, 109 deletions
diff --git a/configure.ac b/configure.ac index 23e21133e6a..2661d1c456e 100644 --- a/configure.ac +++ b/configure.ac @@ -268,6 +268,12 @@ AS_IF([test "x$with_tcmalloc" != xno], [no tcmalloc found (use --without-tcmalloc to disable)])])]) AM_CONDITIONAL(WITH_TCMALLOC, [test "$HAVE_LIBTCMALLOC" = "1"]) +#set pg ref debugging? +AC_ARG_ENABLE([pgrefdebugging], + [AS_HELP_STRING([--enable-pgrefdebugging], [enable pg ref debugging])], + [AC_DEFINE([PG_DEBUG_REFS], [1], [Defined if you want pg ref debugging])], + []) + # # Java is painful # - adapted from OMPI wrappers package diff --git a/src/Makefile.am b/src/Makefile.am index d528b78a1be..cd360fcd147 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1601,6 +1601,7 @@ noinst_HEADERS = \ common/admin_socket.h \ common/admin_socket_client.h \ common/shared_cache.hpp \ + common/tracked_int_ptr.hpp \ common/simple_cache.hpp \ common/sharedptr_registry.hpp \ common/map_cacher.hpp \ diff --git a/src/common/AsyncReserver.h b/src/common/AsyncReserver.h index 638bfb3a1b1..582b3beef7a 100644 --- a/src/common/AsyncReserver.h +++ b/src/common/AsyncReserver.h @@ -90,6 +90,7 @@ public: ) { Mutex::Locker l(lock); if (queue_pointers.count(item)) { + delete queue_pointers[item]->second; queue.erase(queue_pointers[item]); queue_pointers.erase(item); } else { diff --git a/src/common/tracked_int_ptr.hpp b/src/common/tracked_int_ptr.hpp new file mode 100644 index 00000000000..ba0900db6bd --- /dev/null +++ b/src/common/tracked_int_ptr.hpp @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank Storage, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_TRACKEDINTPTR_H +#define CEPH_TRACKEDINTPTR_H + +#include <map> +#include <list> +#include <memory> +#include <utility> +#include "common/Mutex.h" +#include "common/Cond.h" + +template <class T> +class TrackedIntPtr { + T *ptr; + uint64_t id; +public: + TrackedIntPtr() : ptr(NULL), id(0) {} + TrackedIntPtr(T *ptr) : ptr(ptr), id(ptr ? get_with_id(ptr) : 0) {} + ~TrackedIntPtr() { + if (ptr) + put_with_id(ptr, id); + else + assert(id == 0); + } + void swap(TrackedIntPtr &other) { + T *optr = other.ptr; + uint64_t oid = other.id; + other.ptr = ptr; + other.id = id; + ptr = optr; + id = oid; + } + TrackedIntPtr(const TrackedIntPtr &rhs) : + ptr(rhs.ptr), id(ptr ? get_with_id(ptr) : 0) {} + + void operator=(const TrackedIntPtr &rhs) { + TrackedIntPtr o(rhs.ptr); + swap(o); + } + T &operator*() { + return *ptr; + } + T *operator->() { + return ptr; + } + bool operator<(const TrackedIntPtr &lhs) const { + return ptr < lhs.ptr; + } + bool operator==(const TrackedIntPtr &lhs) const { + return ptr == lhs.ptr; + } +}; + +#endif diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 5170412183e..16ae21a700c 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -3145,7 +3145,7 @@ int FileStore::_do_clone_range(int from, int to, uint64_t srcoff, uint64_t len, if (err >= 0) { r += err; } else { - return -errno; + return err; } } @@ -3158,7 +3158,7 @@ int FileStore::_do_clone_range(int from, int to, uint64_t srcoff, uint64_t len, if (err >= 0) { r += err; } else { - return -errno; + return err; } } dout(20) << "_do_clone_range finished " << srcoff << "~" << len @@ -3194,10 +3194,14 @@ int FileStore::_do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, u r = ::read(from, buf, l); dout(25) << " read from " << pos << "~" << l << " got " << r << dendl; if (r < 0) { - r = -errno; - derr << "FileStore::_do_copy_range: read error at " << pos << "~" << len - << ", " << cpp_strerror(r) << dendl; - break; + if (errno == EINTR) { + continue; + } else { + r = -errno; + derr << "FileStore::_do_copy_range: read error at " << pos << "~" << len + << ", " << cpp_strerror(r) << dendl; + break; + } } if (r == 0) { // hrm, bad source range, wtf. diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index ba502e6112d..5e335422579 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -189,6 +189,9 @@ OSDService::OSDService(OSD *osd) : cur_ratio(0), is_stopping_lock("OSDService::is_stopping_lock"), state(NOT_STOPPING) +#ifdef PG_DEBUG_REFS + , pgid_lock("OSDService::pgid_lock") +#endif {} void OSDService::_start_split(const set<pg_t> &pgs) @@ -1217,7 +1220,7 @@ int OSD::shutdown() g_ceph_context->_conf->set_val("debug_ms", "100"); g_ceph_context->_conf->apply_changes(NULL); - // Remove PGs + // Shutdown PGs for (hash_map<pg_t, PG*>::iterator p = pg_map.begin(); p != pg_map.end(); ++p) { @@ -1227,9 +1230,7 @@ int OSD::shutdown() p->second->kick(); p->second->unlock(); p->second->osr->flush(); - p->second->put(); } - pg_map.clear(); // finish ops op_wq.drain(); // should already be empty except for lagard PGs @@ -1308,6 +1309,28 @@ int OSD::shutdown() assert(pg_stat_queue.empty()); } + peering_wq.clear(); + // Remove PGs +#ifdef PG_DEBUG_REFS + service.dump_live_pgids(); +#endif + for (hash_map<pg_t, PG*>::iterator p = pg_map.begin(); + p != pg_map.end(); + ++p) { + dout(20) << " kicking pg " << p->first << dendl; + p->second->lock(); + if (p->second->ref.read() != 1) { + derr << "pgid " << p->first << " has ref count of " + << p->second->ref.read() << dendl; + assert(0); + } + p->second->unlock(); + p->second->put("PGMap"); + } + pg_map.clear(); +#ifdef PG_DEBUG_REFS + service.dump_live_pgids(); +#endif g_conf->remove_observer(this); monc->shutdown(); @@ -1321,6 +1344,7 @@ int OSD::shutdown() cluster_messenger->shutdown(); hbclient_messenger->shutdown(); hbserver_messenger->shutdown(); + peering_wq.clear(); return r; } @@ -1440,7 +1464,7 @@ PG *OSD::_open_lock_pg( pg->lock_with_map_lock_held(no_lockdep_check); else pg->lock(no_lockdep_check); - pg->get(); // because it's in pg_map + pg->get("PGMap"); // because it's in pg_map return pg; } @@ -1467,7 +1491,7 @@ PG* OSD::_make_pg( void OSD::add_newly_split_pg(PG *pg, PG::RecoveryCtx *rctx) { epoch_t e(service.get_osdmap()->get_epoch()); - pg->get(); // For pg_map + pg->get("PGMap"); // For pg_map pg_map[pg->info.pgid] = pg; dout(10) << "Adding newly split pg " << *pg << dendl; vector<int> up, acting; @@ -2981,7 +3005,7 @@ void OSD::send_pg_stats(const utime_t &now) ++p; if (!pg->is_primary()) { // we hold map_lock; role is stable. pg->stat_queue_item.remove_myself(); - pg->put(); + pg->put("pg_stat_queue"); continue; } pg->pg_stats_lock.Lock(); @@ -3025,7 +3049,7 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) xlist<PG*>::iterator p = pg_stat_queue.begin(); while (!p.end()) { PG *pg = *p; - pg->get(); + PGRef _pg(pg); ++p; if (ack->pg_stat.count(pg->info.pgid)) { @@ -3034,7 +3058,7 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) if (acked == pg->pg_stats_stable.reported) { dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl; pg->stat_queue_item.remove_myself(); - pg->put(); + pg->put("pg_stat_queue"); } else { dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << " > acked " << acked << dendl; @@ -3043,7 +3067,6 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack) } else { dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl; } - pg->put(); } if (!pg_stat_queue.size()) { @@ -4455,7 +4478,7 @@ void OSD::consume_map() dout(7) << "consume_map version " << osdmap->get_epoch() << dendl; int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0; - list<PG*> to_remove; + list<PGRef> to_remove; // scan pg's for (hash_map<pg_t,PG*>::iterator it = pg_map.begin(); @@ -4473,8 +4496,7 @@ void OSD::consume_map() set<pg_t> split_pgs; if (!osdmap->have_pg_pool(pg->info.pgid.pool())) { //pool is deleted! - pg->get(); - to_remove.push_back(pg); + to_remove.push_back(PGRef(pg)); } else if (it->first.is_split( service.get_osdmap()->get_pg_num(it->first.pool()), osdmap->get_pg_num(it->first.pool()), @@ -4485,13 +4507,12 @@ void OSD::consume_map() pg->unlock(); } - for (list<PG*>::iterator i = to_remove.begin(); + for (list<PGRef>::iterator i = to_remove.begin(); i != to_remove.end(); - ++i) { + to_remove.erase(i++)) { (*i)->lock(); - _remove_pg((*i)); + _remove_pg(&**i); (*i)->unlock(); - (*i)->put(); } to_remove.clear(); @@ -5791,10 +5812,9 @@ void OSD::handle_pg_remove(OpRequestRef op) up, acting); if (history.same_interval_since <= m->get_epoch()) { assert(pg->get_primary() == m->get_source().num()); - pg->get(); + PGRef _pg(pg); _remove_pg(pg); pg->unlock(); - pg->put(); } else { dout(10) << *pg << " ignoring remove request, pg changed in epoch " << history.same_interval_since @@ -5854,7 +5874,7 @@ void OSD::_remove_pg(PG *pg) // remove from map pg_map.erase(pg->info.pgid); - pg->put(); // since we've taken it out of map + pg->put("PGMap"); // since we've taken it out of map } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 5166ae74aa4..8a74cd8b630 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -427,6 +427,41 @@ public: bool prepare_to_stop(); void got_stop_ack(); + +#ifdef PG_DEBUG_REFS + Mutex pgid_lock; + map<pg_t, int> pgid_tracker; + map<pg_t, PG*> live_pgs; + void add_pgid(pg_t pgid, PG *pg) { + Mutex::Locker l(pgid_lock); + if (!pgid_tracker.count(pgid)) { + pgid_tracker[pgid] = 0; + live_pgs[pgid] = pg; + } + pgid_tracker[pgid]++; + } + void remove_pgid(pg_t pgid, PG *pg) { + Mutex::Locker l(pgid_lock); + assert(pgid_tracker.count(pgid)); + assert(pgid_tracker[pgid] > 0); + pgid_tracker[pgid]--; + if (pgid_tracker[pgid] == 0) { + pgid_tracker.erase(pgid); + live_pgs.erase(pgid); + } + } + void dump_live_pgids() { + Mutex::Locker l(pgid_lock); + derr << "live pgids:" << dendl; + for (map<pg_t, int>::iterator i = pgid_tracker.begin(); + i != pgid_tracker.end(); + ++i) { + derr << "\t" << *i << dendl; + live_pgs[i->first]->dump_live_ids(); + } + } +#endif + OSDService(OSD *osd); }; class OSD : public Dispatcher, @@ -759,14 +794,14 @@ private: ) { if (*i == pg) { peering_queue.erase(i++); - pg->put(); + pg->put("PeeringWQ"); } else { ++i; } } } bool _enqueue(PG *pg) { - pg->get(); + pg->get("PeeringWQ"); peering_queue.push_back(pg); return true; } @@ -795,7 +830,7 @@ private: for (list<PG *>::const_iterator i = pgs.begin(); i != pgs.end(); ++i) { - (*i)->put(); + (*i)->put("PeeringWQ"); } } void _process_finish(const list<PG *> &pgs) { @@ -1019,7 +1054,7 @@ protected: void pg_stat_queue_enqueue(PG *pg) { pg_stat_queue_lock.Lock(); if (pg->is_primary() && !pg->stat_queue_item.is_on_list()) { - pg->get(); + pg->get("pg_stat_queue"); pg_stat_queue.push_back(&pg->stat_queue_item); } osd_stat_updated = true; @@ -1028,7 +1063,7 @@ protected: void pg_stat_queue_dequeue(PG *pg) { pg_stat_queue_lock.Lock(); if (pg->stat_queue_item.remove_myself()) - pg->put(); + pg->put("pg_stat_queue"); pg_stat_queue_lock.Unlock(); } void clear_pg_stat_queue() { @@ -1036,7 +1071,7 @@ protected: while (!pg_stat_queue.empty()) { PG *pg = pg_stat_queue.front(); pg_stat_queue.pop_front(); - pg->put(); + pg->put("pg_stat_queue"); } pg_stat_queue_lock.Unlock(); } @@ -1159,7 +1194,7 @@ protected: } bool _enqueue(PG *pg) { if (!pg->recovery_item.is_on_list()) { - pg->get(); + pg->get("RecoveryWQ"); osd->recovery_queue.push_back(&pg->recovery_item); if (g_conf->osd_recovery_delay_start > 0) { @@ -1172,7 +1207,7 @@ protected: } void _dequeue(PG *pg) { if (pg->recovery_item.remove_myself()) - pg->put(); + pg->put("RecoveryWQ"); } PG *_dequeue() { if (osd->recovery_queue.empty()) @@ -1187,19 +1222,19 @@ protected: } void _queue_front(PG *pg) { if (!pg->recovery_item.is_on_list()) { - pg->get(); + pg->get("RecoveryWQ"); osd->recovery_queue.push_front(&pg->recovery_item); } } void _process(PG *pg) { osd->do_recovery(pg); - pg->put(); + pg->put("RecoveryWQ"); } void _clear() { while (!osd->recovery_queue.empty()) { PG *pg = osd->recovery_queue.front(); osd->recovery_queue.pop_front(); - pg->put(); + pg->put("RecoveryWQ"); } } } recovery_wq; @@ -1231,13 +1266,13 @@ protected: bool _enqueue(PG *pg) { if (pg->snap_trim_item.is_on_list()) return false; - pg->get(); + pg->get("SnapTrimWQ"); osd->snap_trim_queue.push_back(&pg->snap_trim_item); return true; } void _dequeue(PG *pg) { if (pg->snap_trim_item.remove_myself()) - pg->put(); + pg->put("SnapTrimWQ"); } PG *_dequeue() { if (osd->snap_trim_queue.empty()) @@ -1248,7 +1283,7 @@ protected: } void _process(PG *pg) { pg->snap_trimmer(); - pg->put(); + pg->put("SnapTrimWQ"); } void _clear() { osd->snap_trim_queue.clear(); @@ -1275,13 +1310,13 @@ protected: if (pg->scrub_item.is_on_list()) { return false; } - pg->get(); + pg->get("ScrubWQ"); osd->scrub_queue.push_back(&pg->scrub_item); return true; } void _dequeue(PG *pg) { if (pg->scrub_item.remove_myself()) { - pg->put(); + pg->put("ScrubWQ"); } } PG *_dequeue() { @@ -1293,13 +1328,13 @@ protected: } void _process(PG *pg) { pg->scrub(); - pg->put(); + pg->put("ScrubWQ"); } void _clear() { while (!osd->scrub_queue.empty()) { PG *pg = osd->scrub_queue.front(); osd->scrub_queue.pop_front(); - pg->put(); + pg->put("ScrubWQ"); } } } scrub_wq; @@ -1320,13 +1355,13 @@ protected: if (pg->scrub_finalize_item.is_on_list()) { return false; } - pg->get(); + pg->get("ScrubFinalizeWQ"); scrub_finalize_queue.push_back(&pg->scrub_finalize_item); return true; } void _dequeue(PG *pg) { if (pg->scrub_finalize_item.remove_myself()) { - pg->put(); + pg->put("ScrubFinalizeWQ"); } } PG *_dequeue() { @@ -1338,13 +1373,13 @@ protected: } void _process(PG *pg) { pg->scrub_finalize(); - pg->put(); + pg->put("ScrubFinalizeWQ"); } void _clear() { while (!scrub_finalize_queue.empty()) { PG *pg = scrub_finalize_queue.front(); scrub_finalize_queue.pop_front(); - pg->put(); + pg->put("ScrubFinalizeWQ"); } } } scrub_finalize_wq; diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index d5ce8bbc749..ea9beaacda3 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -29,7 +29,8 @@ void OpHistory::on_shutdown() void OpHistory::insert(utime_t now, OpRequestRef op) { - assert(!shutdown); + if (shutdown) + return; duration.insert(make_pair(op->get_duration(), op)); arrived.insert(make_pair(op->get_arrived(), op)); cleanup(now); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f21882966da..6f157eb6680 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -33,6 +33,7 @@ #include "messages/MOSDSubOp.h" #include "messages/MOSDSubOpReply.h" +#include "common/BackTrace.h" #include <sstream> @@ -43,6 +44,73 @@ static ostream& _prefix(std::ostream *_dout, const PG *pg) { return *_dout << pg->gen_prefix(); } +void PG::get(const string &tag) { + ref.inc(); +#ifdef PG_DEBUG_REFS + Mutex::Locker l(_ref_id_lock); + if (!_tag_counts.count(tag)) { + _tag_counts[tag] = 0; + } + _tag_counts[tag]++; +#endif +} +void PG::put(const string &tag) { +#ifdef PG_DEBUG_REFS + { + Mutex::Locker l(_ref_id_lock); + assert(_tag_counts.count(tag)); + _tag_counts[tag]--; + if (_tag_counts[tag] == 0) { + _tag_counts.erase(tag); + } + } +#endif + if (ref.dec() == 0) + delete this; +} + +#ifdef PG_DEBUG_REFS +uint64_t PG::get_with_id() { + ref.inc(); + Mutex::Locker l(_ref_id_lock); + uint64_t id = ++_ref_id; + BackTrace bt(0); + stringstream ss; + bt.print(ss); + dout(20) << __func__ << ": " << info.pgid << " got id " << id << dendl; + assert(!_live_ids.count(id)); + _live_ids.insert(make_pair(id, ss.str())); + return id; +} + +void PG::put_with_id(uint64_t id) { + dout(20) << __func__ << ": " << info.pgid << " put id " << id << dendl; + { + Mutex::Locker l(_ref_id_lock); + assert(_live_ids.count(id)); + _live_ids.erase(id); + } + if (ref.dec() == 0) + delete this; +} + +void PG::dump_live_ids() { + Mutex::Locker l(_ref_id_lock); + dout(0) << "\t" << __func__ << ": " << info.pgid << " live ids:" << dendl; + for (map<uint64_t, string>::iterator i = _live_ids.begin(); + i != _live_ids.end(); + ++i) { + dout(0) << "\t\tid: " << *i << dendl; + } + dout(0) << "\t" << __func__ << ": " << info.pgid << " live tags:" << dendl; + for (map<string, uint64_t>::iterator i = _tag_counts.begin(); + i != _tag_counts.end(); + ++i) { + dout(0) << "\t\tid: " << *i << dendl; + } +} +#endif + void PGPool::update(OSDMapRef map) { const pg_pool_t *pi = map->get_pg_pool(id); @@ -72,7 +140,11 @@ PG::PG(OSDService *o, OSDMapRef curmap, _pool.id), osdmap_ref(curmap), pool(_pool), _lock("PG::_lock"), - ref(0), deleting(false), dirty_info(false), dirty_big_info(false), dirty_log(false), + ref(0), + #ifdef PG_DEBUG_REFS + _ref_id_lock("PG::_ref_id_lock"), _ref_id(0), + #endif + deleting(false), dirty_info(false), dirty_big_info(false), dirty_log(false), info(p), info_struct_v(0), coll(p), log_oid(loid), biginfo_oid(ioid), @@ -98,10 +170,16 @@ PG::PG(OSDService *o, OSDMapRef curmap, active_pushes(0), recovery_state(this) { +#ifdef PG_DEBUG_REFS + osd->add_pgid(p, this); +#endif } PG::~PG() { +#ifdef PG_DEBUG_REFS + osd->remove_pgid(info.pgid, this); +#endif } void PG::lock(bool no_lockdep) @@ -1381,7 +1459,7 @@ void PG::build_might_have_unfound() } struct C_PG_ActivateCommitted : public Context { - PG *pg; + PGRef pg; epoch_t epoch; C_PG_ActivateCommitted(PG *p, epoch_t e) : pg(p), epoch(e) {} @@ -1452,7 +1530,6 @@ void PG::activate(ObjectStore::Transaction& t, clean_up_local(t); // find out when we commit - get(); // for callback tfin.push_back(new C_PG_ActivateCommitted(this, query_epoch)); // initialize snap_trimq @@ -1803,7 +1880,6 @@ void PG::_activate_committed(epoch_t e) } unlock(); - put(); } /* @@ -1860,10 +1936,8 @@ bool PG::queue_scrub() } struct C_PG_FinishRecovery : public Context { - PG *pg; - C_PG_FinishRecovery(PG *p) : pg(p) { - pg->get(); - } + PGRef pg; + C_PG_FinishRecovery(PG *p) : pg(p) {} void finish(int r) { pg->_finish_recovery(this); } @@ -1906,6 +1980,10 @@ void PG::finish_recovery(list<Context*>& tfin) void PG::_finish_recovery(Context *c) { lock(); + if (deleting) { + unlock(); + return; + } if (c == finish_sync_event) { dout(10) << "_finish_recovery" << dendl; finish_sync_event = 0; @@ -1922,7 +2000,6 @@ void PG::_finish_recovery(Context *c) dout(10) << "_finish_recovery -- stale" << dendl; } unlock(); - put(); } void PG::start_recovery_op(const hobject_t& soid) @@ -3689,7 +3766,6 @@ void PG::scrub() lock(); if (deleting) { unlock(); - put(); return; } @@ -4820,16 +4896,14 @@ void PG::set_last_peering_reset() } struct FlushState { - PG *pg; + PGRef pg; epoch_t epoch; - FlushState(PG *pg, epoch_t epoch) : pg(pg), epoch(epoch) { - pg->get(); - } + FlushState(PG *pg, epoch_t epoch) : pg(pg), epoch(epoch) {} ~FlushState() { pg->lock(); - pg->queue_flushed(epoch); + if (!pg->pg_has_reset_since(epoch)) + pg->queue_flushed(epoch); pg->unlock(); - pg->put(); } }; typedef std::tr1::shared_ptr<FlushState> FlushStateRef; @@ -7639,5 +7713,10 @@ bool PG::PriorSet::affected_by_map(const OSDMapRef osdmap, const PG *debug_pg) c return false; } -void intrusive_ptr_add_ref(PG *pg) { pg->get(); } -void intrusive_ptr_release(PG *pg) { pg->put(); } +void intrusive_ptr_add_ref(PG *pg) { pg->get("intptr"); } +void intrusive_ptr_release(PG *pg) { pg->put("intptr"); } + +#ifdef PG_DEBUG_REFS + uint64_t get_with_id(PG *pg) { return pg->get_with_id(); } + void put_with_id(PG *pg, uint64_t id) { return pg->put_with_id(id); } +#endif diff --git a/src/osd/PG.h b/src/osd/PG.h index f437bafbb16..3ac51b87b94 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -42,6 +42,7 @@ #include "msg/Messenger.h" #include "messages/MOSDRepScrub.h" #include "messages/MOSDPGLog.h" +#include "common/tracked_int_ptr.hpp" #include <list> #include <memory> @@ -64,6 +65,18 @@ class MOSDPGScan; class MOSDPGBackfill; class MOSDPGInfo; +class PG; + +void intrusive_ptr_add_ref(PG *pg); +void intrusive_ptr_release(PG *pg); + +#ifdef PG_DEBUG_REFS + uint64_t get_with_id(PG *pg); + void put_with_id(PG *pg, uint64_t id); + typedef TrackedIntPtr<PG> PGRef; +#else + typedef boost::intrusive_ptr<PG> PGRef; +#endif struct PGRecoveryStats { struct per_state_info { @@ -388,6 +401,13 @@ protected: Cond _cond; atomic_t ref; +#ifdef PG_DEBUG_REFS + Mutex _ref_id_lock; + map<uint64_t, string> _live_ids; + map<string, uint64_t> _tag_counts; + uint64_t _ref_id; +#endif + public: bool deleting; // true while in removing or OSD is shutting down @@ -422,17 +442,13 @@ public: _cond.Signal(); } - void get() { - //generic_dout(0) << this << " " << info.pgid << " get " << ref.test() << dendl; - //assert(_lock.is_locked()); - ref.inc(); - } - void put() { - //generic_dout(0) << this << " " << info.pgid << " put " << ref.test() << dendl; - if (ref.dec() == 0) - delete this; - } - +#ifdef PG_DEBUG_REFS + uint64_t get_with_id(); + void put_with_id(uint64_t); + void dump_live_ids(); +#endif + void get(const string &tag); + void put(const string &tag); bool dirty_info, dirty_big_info, dirty_log; @@ -1057,7 +1073,7 @@ public: template <class EVT> struct QueuePeeringEvt : Context { - boost::intrusive_ptr<PG> pg; + PGRef pg; epoch_t epoch; EVT evt; QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) : @@ -1967,9 +1983,4 @@ WRITE_CLASS_ENCODER(PG::OndiskLog) ostream& operator<<(ostream& out, const PG& pg); -void intrusive_ptr_add_ref(PG *pg); -void intrusive_ptr_release(PG *pg); - -typedef boost::intrusive_ptr<PG> PGRef; - #endif diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 50a747e330c..90f6bf5c18e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3696,33 +3696,29 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) class C_OSD_OpApplied : public Context { public: - ReplicatedPG *pg; + ReplicatedPGRef pg; ReplicatedPG::RepGather *repop; C_OSD_OpApplied(ReplicatedPG *p, ReplicatedPG::RepGather *rg) : pg(p), repop(rg) { repop->get(); - pg->get(); // we're copying the pointer } void finish(int r) { pg->op_applied(repop); - pg->put(); } }; class C_OSD_OpCommit : public Context { public: - ReplicatedPG *pg; + ReplicatedPGRef pg; ReplicatedPG::RepGather *repop; C_OSD_OpCommit(ReplicatedPG *p, ReplicatedPG::RepGather *rg) : pg(p), repop(rg) { repop->get(); - pg->get(); // we're copying the pointer } void finish(int r) { pg->op_commit(repop); - pg->put(); } }; @@ -4631,7 +4627,7 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op) RepModify *rm = new RepModify; rm->pg = this; - get(); + get("RepModify"); rm->op = op; rm->ctx = 0; rm->ackerosd = ackerosd; @@ -4764,7 +4760,7 @@ void ReplicatedPG::sub_op_modify_applied(RepModify *rm) if (done) { delete rm->ctx; delete rm; - put(); + put("RepModify"); } } @@ -4799,7 +4795,7 @@ void ReplicatedPG::sub_op_modify_commit(RepModify *rm) if (done) { delete rm->ctx; delete rm; - put(); + put("RepModify"); } } @@ -5845,7 +5841,8 @@ void ReplicatedPG::_applied_recovered_object(ObjectStore::Transaction *t, Object --active_pushes; // requeue an active chunky scrub waiting on recovery ops - if (active_pushes == 0 && scrubber.is_chunky_scrub_active()) { + if (!deleting && active_pushes == 0 + && scrubber.is_chunky_scrub_active()) { osd->scrub_wq.queue(this); } @@ -5862,7 +5859,7 @@ void ReplicatedPG::_applied_recovered_object_replica(ObjectStore::Transaction *t --active_pushes; // requeue an active chunky scrub waiting on recovery ops - if (active_pushes == 0 && + if (!deleting && active_pushes == 0 && scrubber.active_rep_scrub && scrubber.active_rep_scrub->chunky) { osd->rep_scrub_wq.queue(scrubber.active_rep_scrub); scrubber.active_rep_scrub = 0; @@ -6069,14 +6066,11 @@ ObjectContext *ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t, } struct C_PG_MarkUnfoundLost : public Context { - ReplicatedPG *pg; + ReplicatedPGRef pg; list<ObjectContext*> obcs; - C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) { - pg->get(); - } + C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {} void finish(int r) { pg->_finish_mark_all_unfound_lost(obcs); - pg->put(); } }; @@ -6186,7 +6180,8 @@ void ReplicatedPG::_finish_mark_all_unfound_lost(list<ObjectContext*>& obcs) lock(); dout(10) << "_finish_mark_all_unfound_lost " << dendl; - requeue_ops(waiting_for_all_missing); + if (!deleting) + requeue_ops(waiting_for_all_missing); waiting_for_all_missing.clear(); while (!obcs.empty()) { @@ -7456,5 +7451,10 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim& return transit< NotTrimming >(); } -void intrusive_ptr_add_ref(ReplicatedPG *pg) { pg->get(); } -void intrusive_ptr_release(ReplicatedPG *pg) { pg->put(); } +void intrusive_ptr_add_ref(ReplicatedPG *pg) { pg->get("intptr"); } +void intrusive_ptr_release(ReplicatedPG *pg) { pg->put("intptr"); } + +#ifdef PG_DEBUG_REFS +uint64_t get_with_id(ReplicatedPG *pg) { return pg->get_with_id(); } +void put_with_id(ReplicatedPG *pg, uint64_t id) { return pg->put_with_id(id); } +#endif diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 377ca8656a7..2ef2627ba70 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -28,6 +28,18 @@ #include "messages/MOSDSubOp.h" class MOSDSubOpReply; +class ReplicatedPG; +void intrusive_ptr_add_ref(ReplicatedPG *pg); +void intrusive_ptr_release(ReplicatedPG *pg); +uint64_t get_with_id(ReplicatedPG *pg); +void put_with_id(ReplicatedPG *pg, uint64_t id); + +#ifdef PG_DEBUG_REFS + typedef TrackedIntPtr<ReplicatedPG> ReplicatedPGRef; +#else + typedef boost::intrusive_ptr<ReplicatedPG> ReplicatedPGRef; +#endif + class PGLSFilter { protected: string xattr; @@ -783,7 +795,7 @@ protected: } }; struct C_OSD_AppliedRecoveredObject : public Context { - boost::intrusive_ptr<ReplicatedPG> pg; + ReplicatedPGRef pg; ObjectStore::Transaction *t; ObjectContext *obc; C_OSD_AppliedRecoveredObject(ReplicatedPG *p, ObjectStore::Transaction *tt, ObjectContext *o) : @@ -793,7 +805,7 @@ protected: } }; struct C_OSD_CommittedPushedObject : public Context { - boost::intrusive_ptr<ReplicatedPG> pg; + ReplicatedPGRef pg; OpRequestRef op; epoch_t epoch; eversion_t last_complete; @@ -818,7 +830,7 @@ protected: } }; struct C_OSD_CompletedPull : public Context { - boost::intrusive_ptr<ReplicatedPG> pg; + ReplicatedPGRef pg; hobject_t hoid; epoch_t epoch; C_OSD_CompletedPull( @@ -835,7 +847,7 @@ protected: }; friend class C_OSD_CompletedPull; struct C_OSD_AppliedRecoveredObjectReplica : public Context { - boost::intrusive_ptr<ReplicatedPG> pg; + ReplicatedPGRef pg; ObjectStore::Transaction *t; C_OSD_AppliedRecoveredObjectReplica(ReplicatedPG *p, ObjectStore::Transaction *tt) : pg(p), t(tt) {} @@ -1021,7 +1033,4 @@ inline ostream& operator<<(ostream& out, ReplicatedPG::AccessMode& mode) return out; } -void intrusive_ptr_add_ref(ReplicatedPG *pg); -void intrusive_ptr_release(ReplicatedPG *pg); - #endif |