summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-04-19 17:11:29 -0700
committerSamuel Just <sam.just@inktank.com>2013-04-19 17:11:31 -0700
commit481c532ff361b21e044621ac13c8f00ebfb1b3dc (patch)
tree5ff54d892cb0639fa27b7801fbe027bcb30bbaba
parentad845e61b9c0e19915de527e09ec1f8747b92df8 (diff)
parent88d9ee1d01b1c7835c1c83959c89d2153a56454d (diff)
downloadceph-481c532ff361b21e044621ac13c8f00ebfb1b3dc.tar.gz
Merge branch 'wip_4662_clean' into next
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r--configure.ac6
-rw-r--r--src/Makefile.am1
-rw-r--r--src/common/AsyncReserver.h1
-rw-r--r--src/common/tracked_int_ptr.hpp67
-rw-r--r--src/os/FileStore.cc16
-rw-r--r--src/osd/OSD.cc58
-rw-r--r--src/osd/OSD.h79
-rw-r--r--src/osd/OpRequest.cc3
-rw-r--r--src/osd/PG.cc115
-rw-r--r--src/osd/PG.h45
-rw-r--r--src/osd/ReplicatedPG.cc38
-rw-r--r--src/osd/ReplicatedPG.h23
12 files changed, 343 insertions, 109 deletions
diff --git a/configure.ac b/configure.ac
index 23e21133e6a..2661d1c456e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -268,6 +268,12 @@ AS_IF([test "x$with_tcmalloc" != xno],
[no tcmalloc found (use --without-tcmalloc to disable)])])])
AM_CONDITIONAL(WITH_TCMALLOC, [test "$HAVE_LIBTCMALLOC" = "1"])
+#set pg ref debugging?
+AC_ARG_ENABLE([pgrefdebugging],
+ [AS_HELP_STRING([--enable-pgrefdebugging], [enable pg ref debugging])],
+ [AC_DEFINE([PG_DEBUG_REFS], [1], [Defined if you want pg ref debugging])],
+ [])
+
#
# Java is painful
# - adapted from OMPI wrappers package
diff --git a/src/Makefile.am b/src/Makefile.am
index d528b78a1be..cd360fcd147 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1601,6 +1601,7 @@ noinst_HEADERS = \
common/admin_socket.h \
common/admin_socket_client.h \
common/shared_cache.hpp \
+ common/tracked_int_ptr.hpp \
common/simple_cache.hpp \
common/sharedptr_registry.hpp \
common/map_cacher.hpp \
diff --git a/src/common/AsyncReserver.h b/src/common/AsyncReserver.h
index 638bfb3a1b1..582b3beef7a 100644
--- a/src/common/AsyncReserver.h
+++ b/src/common/AsyncReserver.h
@@ -90,6 +90,7 @@ public:
) {
Mutex::Locker l(lock);
if (queue_pointers.count(item)) {
+ delete queue_pointers[item]->second;
queue.erase(queue_pointers[item]);
queue_pointers.erase(item);
} else {
diff --git a/src/common/tracked_int_ptr.hpp b/src/common/tracked_int_ptr.hpp
new file mode 100644
index 00000000000..ba0900db6bd
--- /dev/null
+++ b/src/common/tracked_int_ptr.hpp
@@ -0,0 +1,67 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_TRACKEDINTPTR_H
+#define CEPH_TRACKEDINTPTR_H
+
+#include <map>
+#include <list>
+#include <memory>
+#include <utility>
+#include "common/Mutex.h"
+#include "common/Cond.h"
+
+template <class T>
+class TrackedIntPtr {
+ T *ptr;
+ uint64_t id;
+public:
+ TrackedIntPtr() : ptr(NULL), id(0) {}
+ TrackedIntPtr(T *ptr) : ptr(ptr), id(ptr ? get_with_id(ptr) : 0) {}
+ ~TrackedIntPtr() {
+ if (ptr)
+ put_with_id(ptr, id);
+ else
+ assert(id == 0);
+ }
+ void swap(TrackedIntPtr &other) {
+ T *optr = other.ptr;
+ uint64_t oid = other.id;
+ other.ptr = ptr;
+ other.id = id;
+ ptr = optr;
+ id = oid;
+ }
+ TrackedIntPtr(const TrackedIntPtr &rhs) :
+ ptr(rhs.ptr), id(ptr ? get_with_id(ptr) : 0) {}
+
+ void operator=(const TrackedIntPtr &rhs) {
+ TrackedIntPtr o(rhs.ptr);
+ swap(o);
+ }
+ T &operator*() {
+ return *ptr;
+ }
+ T *operator->() {
+ return ptr;
+ }
+ bool operator<(const TrackedIntPtr &lhs) const {
+ return ptr < lhs.ptr;
+ }
+ bool operator==(const TrackedIntPtr &lhs) const {
+ return ptr == lhs.ptr;
+ }
+};
+
+#endif
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 5170412183e..16ae21a700c 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -3145,7 +3145,7 @@ int FileStore::_do_clone_range(int from, int to, uint64_t srcoff, uint64_t len,
if (err >= 0) {
r += err;
} else {
- return -errno;
+ return err;
}
}
@@ -3158,7 +3158,7 @@ int FileStore::_do_clone_range(int from, int to, uint64_t srcoff, uint64_t len,
if (err >= 0) {
r += err;
} else {
- return -errno;
+ return err;
}
}
dout(20) << "_do_clone_range finished " << srcoff << "~" << len
@@ -3194,10 +3194,14 @@ int FileStore::_do_copy_range(int from, int to, uint64_t srcoff, uint64_t len, u
r = ::read(from, buf, l);
dout(25) << " read from " << pos << "~" << l << " got " << r << dendl;
if (r < 0) {
- r = -errno;
- derr << "FileStore::_do_copy_range: read error at " << pos << "~" << len
- << ", " << cpp_strerror(r) << dendl;
- break;
+ if (errno == EINTR) {
+ continue;
+ } else {
+ r = -errno;
+ derr << "FileStore::_do_copy_range: read error at " << pos << "~" << len
+ << ", " << cpp_strerror(r) << dendl;
+ break;
+ }
}
if (r == 0) {
// hrm, bad source range, wtf.
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index ba502e6112d..5e335422579 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -189,6 +189,9 @@ OSDService::OSDService(OSD *osd) :
cur_ratio(0),
is_stopping_lock("OSDService::is_stopping_lock"),
state(NOT_STOPPING)
+#ifdef PG_DEBUG_REFS
+ , pgid_lock("OSDService::pgid_lock")
+#endif
{}
void OSDService::_start_split(const set<pg_t> &pgs)
@@ -1217,7 +1220,7 @@ int OSD::shutdown()
g_ceph_context->_conf->set_val("debug_ms", "100");
g_ceph_context->_conf->apply_changes(NULL);
- // Remove PGs
+ // Shutdown PGs
for (hash_map<pg_t, PG*>::iterator p = pg_map.begin();
p != pg_map.end();
++p) {
@@ -1227,9 +1230,7 @@ int OSD::shutdown()
p->second->kick();
p->second->unlock();
p->second->osr->flush();
- p->second->put();
}
- pg_map.clear();
// finish ops
op_wq.drain(); // should already be empty except for lagard PGs
@@ -1308,6 +1309,28 @@ int OSD::shutdown()
assert(pg_stat_queue.empty());
}
+ peering_wq.clear();
+ // Remove PGs
+#ifdef PG_DEBUG_REFS
+ service.dump_live_pgids();
+#endif
+ for (hash_map<pg_t, PG*>::iterator p = pg_map.begin();
+ p != pg_map.end();
+ ++p) {
+ dout(20) << " kicking pg " << p->first << dendl;
+ p->second->lock();
+ if (p->second->ref.read() != 1) {
+ derr << "pgid " << p->first << " has ref count of "
+ << p->second->ref.read() << dendl;
+ assert(0);
+ }
+ p->second->unlock();
+ p->second->put("PGMap");
+ }
+ pg_map.clear();
+#ifdef PG_DEBUG_REFS
+ service.dump_live_pgids();
+#endif
g_conf->remove_observer(this);
monc->shutdown();
@@ -1321,6 +1344,7 @@ int OSD::shutdown()
cluster_messenger->shutdown();
hbclient_messenger->shutdown();
hbserver_messenger->shutdown();
+ peering_wq.clear();
return r;
}
@@ -1440,7 +1464,7 @@ PG *OSD::_open_lock_pg(
pg->lock_with_map_lock_held(no_lockdep_check);
else
pg->lock(no_lockdep_check);
- pg->get(); // because it's in pg_map
+ pg->get("PGMap"); // because it's in pg_map
return pg;
}
@@ -1467,7 +1491,7 @@ PG* OSD::_make_pg(
void OSD::add_newly_split_pg(PG *pg, PG::RecoveryCtx *rctx)
{
epoch_t e(service.get_osdmap()->get_epoch());
- pg->get(); // For pg_map
+ pg->get("PGMap"); // For pg_map
pg_map[pg->info.pgid] = pg;
dout(10) << "Adding newly split pg " << *pg << dendl;
vector<int> up, acting;
@@ -2981,7 +3005,7 @@ void OSD::send_pg_stats(const utime_t &now)
++p;
if (!pg->is_primary()) { // we hold map_lock; role is stable.
pg->stat_queue_item.remove_myself();
- pg->put();
+ pg->put("pg_stat_queue");
continue;
}
pg->pg_stats_lock.Lock();
@@ -3025,7 +3049,7 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
xlist<PG*>::iterator p = pg_stat_queue.begin();
while (!p.end()) {
PG *pg = *p;
- pg->get();
+ PGRef _pg(pg);
++p;
if (ack->pg_stat.count(pg->info.pgid)) {
@@ -3034,7 +3058,7 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
if (acked == pg->pg_stats_stable.reported) {
dout(25) << " ack on " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl;
pg->stat_queue_item.remove_myself();
- pg->put();
+ pg->put("pg_stat_queue");
} else {
dout(25) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported
<< " > acked " << acked << dendl;
@@ -3043,7 +3067,6 @@ void OSD::handle_pg_stats_ack(MPGStatsAck *ack)
} else {
dout(30) << " still pending " << pg->info.pgid << " " << pg->pg_stats_stable.reported << dendl;
}
- pg->put();
}
if (!pg_stat_queue.size()) {
@@ -4455,7 +4478,7 @@ void OSD::consume_map()
dout(7) << "consume_map version " << osdmap->get_epoch() << dendl;
int num_pg_primary = 0, num_pg_replica = 0, num_pg_stray = 0;
- list<PG*> to_remove;
+ list<PGRef> to_remove;
// scan pg's
for (hash_map<pg_t,PG*>::iterator it = pg_map.begin();
@@ -4473,8 +4496,7 @@ void OSD::consume_map()
set<pg_t> split_pgs;
if (!osdmap->have_pg_pool(pg->info.pgid.pool())) {
//pool is deleted!
- pg->get();
- to_remove.push_back(pg);
+ to_remove.push_back(PGRef(pg));
} else if (it->first.is_split(
service.get_osdmap()->get_pg_num(it->first.pool()),
osdmap->get_pg_num(it->first.pool()),
@@ -4485,13 +4507,12 @@ void OSD::consume_map()
pg->unlock();
}
- for (list<PG*>::iterator i = to_remove.begin();
+ for (list<PGRef>::iterator i = to_remove.begin();
i != to_remove.end();
- ++i) {
+ to_remove.erase(i++)) {
(*i)->lock();
- _remove_pg((*i));
+ _remove_pg(&**i);
(*i)->unlock();
- (*i)->put();
}
to_remove.clear();
@@ -5791,10 +5812,9 @@ void OSD::handle_pg_remove(OpRequestRef op)
up, acting);
if (history.same_interval_since <= m->get_epoch()) {
assert(pg->get_primary() == m->get_source().num());
- pg->get();
+ PGRef _pg(pg);
_remove_pg(pg);
pg->unlock();
- pg->put();
} else {
dout(10) << *pg << " ignoring remove request, pg changed in epoch "
<< history.same_interval_since
@@ -5854,7 +5874,7 @@ void OSD::_remove_pg(PG *pg)
// remove from map
pg_map.erase(pg->info.pgid);
- pg->put(); // since we've taken it out of map
+ pg->put("PGMap"); // since we've taken it out of map
}
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 5166ae74aa4..8a74cd8b630 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -427,6 +427,41 @@ public:
bool prepare_to_stop();
void got_stop_ack();
+
+#ifdef PG_DEBUG_REFS
+ Mutex pgid_lock;
+ map<pg_t, int> pgid_tracker;
+ map<pg_t, PG*> live_pgs;
+ void add_pgid(pg_t pgid, PG *pg) {
+ Mutex::Locker l(pgid_lock);
+ if (!pgid_tracker.count(pgid)) {
+ pgid_tracker[pgid] = 0;
+ live_pgs[pgid] = pg;
+ }
+ pgid_tracker[pgid]++;
+ }
+ void remove_pgid(pg_t pgid, PG *pg) {
+ Mutex::Locker l(pgid_lock);
+ assert(pgid_tracker.count(pgid));
+ assert(pgid_tracker[pgid] > 0);
+ pgid_tracker[pgid]--;
+ if (pgid_tracker[pgid] == 0) {
+ pgid_tracker.erase(pgid);
+ live_pgs.erase(pgid);
+ }
+ }
+ void dump_live_pgids() {
+ Mutex::Locker l(pgid_lock);
+ derr << "live pgids:" << dendl;
+ for (map<pg_t, int>::iterator i = pgid_tracker.begin();
+ i != pgid_tracker.end();
+ ++i) {
+ derr << "\t" << *i << dendl;
+ live_pgs[i->first]->dump_live_ids();
+ }
+ }
+#endif
+
OSDService(OSD *osd);
};
class OSD : public Dispatcher,
@@ -759,14 +794,14 @@ private:
) {
if (*i == pg) {
peering_queue.erase(i++);
- pg->put();
+ pg->put("PeeringWQ");
} else {
++i;
}
}
}
bool _enqueue(PG *pg) {
- pg->get();
+ pg->get("PeeringWQ");
peering_queue.push_back(pg);
return true;
}
@@ -795,7 +830,7 @@ private:
for (list<PG *>::const_iterator i = pgs.begin();
i != pgs.end();
++i) {
- (*i)->put();
+ (*i)->put("PeeringWQ");
}
}
void _process_finish(const list<PG *> &pgs) {
@@ -1019,7 +1054,7 @@ protected:
void pg_stat_queue_enqueue(PG *pg) {
pg_stat_queue_lock.Lock();
if (pg->is_primary() && !pg->stat_queue_item.is_on_list()) {
- pg->get();
+ pg->get("pg_stat_queue");
pg_stat_queue.push_back(&pg->stat_queue_item);
}
osd_stat_updated = true;
@@ -1028,7 +1063,7 @@ protected:
void pg_stat_queue_dequeue(PG *pg) {
pg_stat_queue_lock.Lock();
if (pg->stat_queue_item.remove_myself())
- pg->put();
+ pg->put("pg_stat_queue");
pg_stat_queue_lock.Unlock();
}
void clear_pg_stat_queue() {
@@ -1036,7 +1071,7 @@ protected:
while (!pg_stat_queue.empty()) {
PG *pg = pg_stat_queue.front();
pg_stat_queue.pop_front();
- pg->put();
+ pg->put("pg_stat_queue");
}
pg_stat_queue_lock.Unlock();
}
@@ -1159,7 +1194,7 @@ protected:
}
bool _enqueue(PG *pg) {
if (!pg->recovery_item.is_on_list()) {
- pg->get();
+ pg->get("RecoveryWQ");
osd->recovery_queue.push_back(&pg->recovery_item);
if (g_conf->osd_recovery_delay_start > 0) {
@@ -1172,7 +1207,7 @@ protected:
}
void _dequeue(PG *pg) {
if (pg->recovery_item.remove_myself())
- pg->put();
+ pg->put("RecoveryWQ");
}
PG *_dequeue() {
if (osd->recovery_queue.empty())
@@ -1187,19 +1222,19 @@ protected:
}
void _queue_front(PG *pg) {
if (!pg->recovery_item.is_on_list()) {
- pg->get();
+ pg->get("RecoveryWQ");
osd->recovery_queue.push_front(&pg->recovery_item);
}
}
void _process(PG *pg) {
osd->do_recovery(pg);
- pg->put();
+ pg->put("RecoveryWQ");
}
void _clear() {
while (!osd->recovery_queue.empty()) {
PG *pg = osd->recovery_queue.front();
osd->recovery_queue.pop_front();
- pg->put();
+ pg->put("RecoveryWQ");
}
}
} recovery_wq;
@@ -1231,13 +1266,13 @@ protected:
bool _enqueue(PG *pg) {
if (pg->snap_trim_item.is_on_list())
return false;
- pg->get();
+ pg->get("SnapTrimWQ");
osd->snap_trim_queue.push_back(&pg->snap_trim_item);
return true;
}
void _dequeue(PG *pg) {
if (pg->snap_trim_item.remove_myself())
- pg->put();
+ pg->put("SnapTrimWQ");
}
PG *_dequeue() {
if (osd->snap_trim_queue.empty())
@@ -1248,7 +1283,7 @@ protected:
}
void _process(PG *pg) {
pg->snap_trimmer();
- pg->put();
+ pg->put("SnapTrimWQ");
}
void _clear() {
osd->snap_trim_queue.clear();
@@ -1275,13 +1310,13 @@ protected:
if (pg->scrub_item.is_on_list()) {
return false;
}
- pg->get();
+ pg->get("ScrubWQ");
osd->scrub_queue.push_back(&pg->scrub_item);
return true;
}
void _dequeue(PG *pg) {
if (pg->scrub_item.remove_myself()) {
- pg->put();
+ pg->put("ScrubWQ");
}
}
PG *_dequeue() {
@@ -1293,13 +1328,13 @@ protected:
}
void _process(PG *pg) {
pg->scrub();
- pg->put();
+ pg->put("ScrubWQ");
}
void _clear() {
while (!osd->scrub_queue.empty()) {
PG *pg = osd->scrub_queue.front();
osd->scrub_queue.pop_front();
- pg->put();
+ pg->put("ScrubWQ");
}
}
} scrub_wq;
@@ -1320,13 +1355,13 @@ protected:
if (pg->scrub_finalize_item.is_on_list()) {
return false;
}
- pg->get();
+ pg->get("ScrubFinalizeWQ");
scrub_finalize_queue.push_back(&pg->scrub_finalize_item);
return true;
}
void _dequeue(PG *pg) {
if (pg->scrub_finalize_item.remove_myself()) {
- pg->put();
+ pg->put("ScrubFinalizeWQ");
}
}
PG *_dequeue() {
@@ -1338,13 +1373,13 @@ protected:
}
void _process(PG *pg) {
pg->scrub_finalize();
- pg->put();
+ pg->put("ScrubFinalizeWQ");
}
void _clear() {
while (!scrub_finalize_queue.empty()) {
PG *pg = scrub_finalize_queue.front();
scrub_finalize_queue.pop_front();
- pg->put();
+ pg->put("ScrubFinalizeWQ");
}
}
} scrub_finalize_wq;
diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc
index d5ce8bbc749..ea9beaacda3 100644
--- a/src/osd/OpRequest.cc
+++ b/src/osd/OpRequest.cc
@@ -29,7 +29,8 @@ void OpHistory::on_shutdown()
void OpHistory::insert(utime_t now, OpRequestRef op)
{
- assert(!shutdown);
+ if (shutdown)
+ return;
duration.insert(make_pair(op->get_duration(), op));
arrived.insert(make_pair(op->get_arrived(), op));
cleanup(now);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index f21882966da..6f157eb6680 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -33,6 +33,7 @@
#include "messages/MOSDSubOp.h"
#include "messages/MOSDSubOpReply.h"
+#include "common/BackTrace.h"
#include <sstream>
@@ -43,6 +44,73 @@ static ostream& _prefix(std::ostream *_dout, const PG *pg) {
return *_dout << pg->gen_prefix();
}
+void PG::get(const string &tag) {
+ ref.inc();
+#ifdef PG_DEBUG_REFS
+ Mutex::Locker l(_ref_id_lock);
+ if (!_tag_counts.count(tag)) {
+ _tag_counts[tag] = 0;
+ }
+ _tag_counts[tag]++;
+#endif
+}
+void PG::put(const string &tag) {
+#ifdef PG_DEBUG_REFS
+ {
+ Mutex::Locker l(_ref_id_lock);
+ assert(_tag_counts.count(tag));
+ _tag_counts[tag]--;
+ if (_tag_counts[tag] == 0) {
+ _tag_counts.erase(tag);
+ }
+ }
+#endif
+ if (ref.dec() == 0)
+ delete this;
+}
+
+#ifdef PG_DEBUG_REFS
+uint64_t PG::get_with_id() {
+ ref.inc();
+ Mutex::Locker l(_ref_id_lock);
+ uint64_t id = ++_ref_id;
+ BackTrace bt(0);
+ stringstream ss;
+ bt.print(ss);
+ dout(20) << __func__ << ": " << info.pgid << " got id " << id << dendl;
+ assert(!_live_ids.count(id));
+ _live_ids.insert(make_pair(id, ss.str()));
+ return id;
+}
+
+void PG::put_with_id(uint64_t id) {
+ dout(20) << __func__ << ": " << info.pgid << " put id " << id << dendl;
+ {
+ Mutex::Locker l(_ref_id_lock);
+ assert(_live_ids.count(id));
+ _live_ids.erase(id);
+ }
+ if (ref.dec() == 0)
+ delete this;
+}
+
+void PG::dump_live_ids() {
+ Mutex::Locker l(_ref_id_lock);
+ dout(0) << "\t" << __func__ << ": " << info.pgid << " live ids:" << dendl;
+ for (map<uint64_t, string>::iterator i = _live_ids.begin();
+ i != _live_ids.end();
+ ++i) {
+ dout(0) << "\t\tid: " << *i << dendl;
+ }
+ dout(0) << "\t" << __func__ << ": " << info.pgid << " live tags:" << dendl;
+ for (map<string, uint64_t>::iterator i = _tag_counts.begin();
+ i != _tag_counts.end();
+ ++i) {
+ dout(0) << "\t\tid: " << *i << dendl;
+ }
+}
+#endif
+
void PGPool::update(OSDMapRef map)
{
const pg_pool_t *pi = map->get_pg_pool(id);
@@ -72,7 +140,11 @@ PG::PG(OSDService *o, OSDMapRef curmap,
_pool.id),
osdmap_ref(curmap), pool(_pool),
_lock("PG::_lock"),
- ref(0), deleting(false), dirty_info(false), dirty_big_info(false), dirty_log(false),
+ ref(0),
+ #ifdef PG_DEBUG_REFS
+ _ref_id_lock("PG::_ref_id_lock"), _ref_id(0),
+ #endif
+ deleting(false), dirty_info(false), dirty_big_info(false), dirty_log(false),
info(p),
info_struct_v(0),
coll(p), log_oid(loid), biginfo_oid(ioid),
@@ -98,10 +170,16 @@ PG::PG(OSDService *o, OSDMapRef curmap,
active_pushes(0),
recovery_state(this)
{
+#ifdef PG_DEBUG_REFS
+ osd->add_pgid(p, this);
+#endif
}
PG::~PG()
{
+#ifdef PG_DEBUG_REFS
+ osd->remove_pgid(info.pgid, this);
+#endif
}
void PG::lock(bool no_lockdep)
@@ -1381,7 +1459,7 @@ void PG::build_might_have_unfound()
}
struct C_PG_ActivateCommitted : public Context {
- PG *pg;
+ PGRef pg;
epoch_t epoch;
C_PG_ActivateCommitted(PG *p, epoch_t e)
: pg(p), epoch(e) {}
@@ -1452,7 +1530,6 @@ void PG::activate(ObjectStore::Transaction& t,
clean_up_local(t);
// find out when we commit
- get(); // for callback
tfin.push_back(new C_PG_ActivateCommitted(this, query_epoch));
// initialize snap_trimq
@@ -1803,7 +1880,6 @@ void PG::_activate_committed(epoch_t e)
}
unlock();
- put();
}
/*
@@ -1860,10 +1936,8 @@ bool PG::queue_scrub()
}
struct C_PG_FinishRecovery : public Context {
- PG *pg;
- C_PG_FinishRecovery(PG *p) : pg(p) {
- pg->get();
- }
+ PGRef pg;
+ C_PG_FinishRecovery(PG *p) : pg(p) {}
void finish(int r) {
pg->_finish_recovery(this);
}
@@ -1906,6 +1980,10 @@ void PG::finish_recovery(list<Context*>& tfin)
void PG::_finish_recovery(Context *c)
{
lock();
+ if (deleting) {
+ unlock();
+ return;
+ }
if (c == finish_sync_event) {
dout(10) << "_finish_recovery" << dendl;
finish_sync_event = 0;
@@ -1922,7 +2000,6 @@ void PG::_finish_recovery(Context *c)
dout(10) << "_finish_recovery -- stale" << dendl;
}
unlock();
- put();
}
void PG::start_recovery_op(const hobject_t& soid)
@@ -3689,7 +3766,6 @@ void PG::scrub()
lock();
if (deleting) {
unlock();
- put();
return;
}
@@ -4820,16 +4896,14 @@ void PG::set_last_peering_reset()
}
struct FlushState {
- PG *pg;
+ PGRef pg;
epoch_t epoch;
- FlushState(PG *pg, epoch_t epoch) : pg(pg), epoch(epoch) {
- pg->get();
- }
+ FlushState(PG *pg, epoch_t epoch) : pg(pg), epoch(epoch) {}
~FlushState() {
pg->lock();
- pg->queue_flushed(epoch);
+ if (!pg->pg_has_reset_since(epoch))
+ pg->queue_flushed(epoch);
pg->unlock();
- pg->put();
}
};
typedef std::tr1::shared_ptr<FlushState> FlushStateRef;
@@ -7639,5 +7713,10 @@ bool PG::PriorSet::affected_by_map(const OSDMapRef osdmap, const PG *debug_pg) c
return false;
}
-void intrusive_ptr_add_ref(PG *pg) { pg->get(); }
-void intrusive_ptr_release(PG *pg) { pg->put(); }
+void intrusive_ptr_add_ref(PG *pg) { pg->get("intptr"); }
+void intrusive_ptr_release(PG *pg) { pg->put("intptr"); }
+
+#ifdef PG_DEBUG_REFS
+ uint64_t get_with_id(PG *pg) { return pg->get_with_id(); }
+ void put_with_id(PG *pg, uint64_t id) { return pg->put_with_id(id); }
+#endif
diff --git a/src/osd/PG.h b/src/osd/PG.h
index f437bafbb16..3ac51b87b94 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -42,6 +42,7 @@
#include "msg/Messenger.h"
#include "messages/MOSDRepScrub.h"
#include "messages/MOSDPGLog.h"
+#include "common/tracked_int_ptr.hpp"
#include <list>
#include <memory>
@@ -64,6 +65,18 @@ class MOSDPGScan;
class MOSDPGBackfill;
class MOSDPGInfo;
+class PG;
+
+void intrusive_ptr_add_ref(PG *pg);
+void intrusive_ptr_release(PG *pg);
+
+#ifdef PG_DEBUG_REFS
+ uint64_t get_with_id(PG *pg);
+ void put_with_id(PG *pg, uint64_t id);
+ typedef TrackedIntPtr<PG> PGRef;
+#else
+ typedef boost::intrusive_ptr<PG> PGRef;
+#endif
struct PGRecoveryStats {
struct per_state_info {
@@ -388,6 +401,13 @@ protected:
Cond _cond;
atomic_t ref;
+#ifdef PG_DEBUG_REFS
+ Mutex _ref_id_lock;
+ map<uint64_t, string> _live_ids;
+ map<string, uint64_t> _tag_counts;
+ uint64_t _ref_id;
+#endif
+
public:
bool deleting; // true while in removing or OSD is shutting down
@@ -422,17 +442,13 @@ public:
_cond.Signal();
}
- void get() {
- //generic_dout(0) << this << " " << info.pgid << " get " << ref.test() << dendl;
- //assert(_lock.is_locked());
- ref.inc();
- }
- void put() {
- //generic_dout(0) << this << " " << info.pgid << " put " << ref.test() << dendl;
- if (ref.dec() == 0)
- delete this;
- }
-
+#ifdef PG_DEBUG_REFS
+ uint64_t get_with_id();
+ void put_with_id(uint64_t);
+ void dump_live_ids();
+#endif
+ void get(const string &tag);
+ void put(const string &tag);
bool dirty_info, dirty_big_info, dirty_log;
@@ -1057,7 +1073,7 @@ public:
template <class EVT>
struct QueuePeeringEvt : Context {
- boost::intrusive_ptr<PG> pg;
+ PGRef pg;
epoch_t epoch;
EVT evt;
QueuePeeringEvt(PG *pg, epoch_t epoch, EVT evt) :
@@ -1967,9 +1983,4 @@ WRITE_CLASS_ENCODER(PG::OndiskLog)
ostream& operator<<(ostream& out, const PG& pg);
-void intrusive_ptr_add_ref(PG *pg);
-void intrusive_ptr_release(PG *pg);
-
-typedef boost::intrusive_ptr<PG> PGRef;
-
#endif
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 50a747e330c..90f6bf5c18e 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -3696,33 +3696,29 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
class C_OSD_OpApplied : public Context {
public:
- ReplicatedPG *pg;
+ ReplicatedPGRef pg;
ReplicatedPG::RepGather *repop;
C_OSD_OpApplied(ReplicatedPG *p, ReplicatedPG::RepGather *rg) :
pg(p), repop(rg) {
repop->get();
- pg->get(); // we're copying the pointer
}
void finish(int r) {
pg->op_applied(repop);
- pg->put();
}
};
class C_OSD_OpCommit : public Context {
public:
- ReplicatedPG *pg;
+ ReplicatedPGRef pg;
ReplicatedPG::RepGather *repop;
C_OSD_OpCommit(ReplicatedPG *p, ReplicatedPG::RepGather *rg) :
pg(p), repop(rg) {
repop->get();
- pg->get(); // we're copying the pointer
}
void finish(int r) {
pg->op_commit(repop);
- pg->put();
}
};
@@ -4631,7 +4627,7 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op)
RepModify *rm = new RepModify;
rm->pg = this;
- get();
+ get("RepModify");
rm->op = op;
rm->ctx = 0;
rm->ackerosd = ackerosd;
@@ -4764,7 +4760,7 @@ void ReplicatedPG::sub_op_modify_applied(RepModify *rm)
if (done) {
delete rm->ctx;
delete rm;
- put();
+ put("RepModify");
}
}
@@ -4799,7 +4795,7 @@ void ReplicatedPG::sub_op_modify_commit(RepModify *rm)
if (done) {
delete rm->ctx;
delete rm;
- put();
+ put("RepModify");
}
}
@@ -5845,7 +5841,8 @@ void ReplicatedPG::_applied_recovered_object(ObjectStore::Transaction *t, Object
--active_pushes;
// requeue an active chunky scrub waiting on recovery ops
- if (active_pushes == 0 && scrubber.is_chunky_scrub_active()) {
+ if (!deleting && active_pushes == 0
+ && scrubber.is_chunky_scrub_active()) {
osd->scrub_wq.queue(this);
}
@@ -5862,7 +5859,7 @@ void ReplicatedPG::_applied_recovered_object_replica(ObjectStore::Transaction *t
--active_pushes;
// requeue an active chunky scrub waiting on recovery ops
- if (active_pushes == 0 &&
+ if (!deleting && active_pushes == 0 &&
scrubber.active_rep_scrub && scrubber.active_rep_scrub->chunky) {
osd->rep_scrub_wq.queue(scrubber.active_rep_scrub);
scrubber.active_rep_scrub = 0;
@@ -6069,14 +6066,11 @@ ObjectContext *ReplicatedPG::mark_object_lost(ObjectStore::Transaction *t,
}
struct C_PG_MarkUnfoundLost : public Context {
- ReplicatedPG *pg;
+ ReplicatedPGRef pg;
list<ObjectContext*> obcs;
- C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {
- pg->get();
- }
+ C_PG_MarkUnfoundLost(ReplicatedPG *p) : pg(p) {}
void finish(int r) {
pg->_finish_mark_all_unfound_lost(obcs);
- pg->put();
}
};
@@ -6186,7 +6180,8 @@ void ReplicatedPG::_finish_mark_all_unfound_lost(list<ObjectContext*>& obcs)
lock();
dout(10) << "_finish_mark_all_unfound_lost " << dendl;
- requeue_ops(waiting_for_all_missing);
+ if (!deleting)
+ requeue_ops(waiting_for_all_missing);
waiting_for_all_missing.clear();
while (!obcs.empty()) {
@@ -7456,5 +7451,10 @@ boost::statechart::result ReplicatedPG::WaitingOnReplicas::react(const SnapTrim&
return transit< NotTrimming >();
}
-void intrusive_ptr_add_ref(ReplicatedPG *pg) { pg->get(); }
-void intrusive_ptr_release(ReplicatedPG *pg) { pg->put(); }
+void intrusive_ptr_add_ref(ReplicatedPG *pg) { pg->get("intptr"); }
+void intrusive_ptr_release(ReplicatedPG *pg) { pg->put("intptr"); }
+
+#ifdef PG_DEBUG_REFS
+uint64_t get_with_id(ReplicatedPG *pg) { return pg->get_with_id(); }
+void put_with_id(ReplicatedPG *pg, uint64_t id) { return pg->put_with_id(id); }
+#endif
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 377ca8656a7..2ef2627ba70 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -28,6 +28,18 @@
#include "messages/MOSDSubOp.h"
class MOSDSubOpReply;
+class ReplicatedPG;
+void intrusive_ptr_add_ref(ReplicatedPG *pg);
+void intrusive_ptr_release(ReplicatedPG *pg);
+uint64_t get_with_id(ReplicatedPG *pg);
+void put_with_id(ReplicatedPG *pg, uint64_t id);
+
+#ifdef PG_DEBUG_REFS
+ typedef TrackedIntPtr<ReplicatedPG> ReplicatedPGRef;
+#else
+ typedef boost::intrusive_ptr<ReplicatedPG> ReplicatedPGRef;
+#endif
+
class PGLSFilter {
protected:
string xattr;
@@ -783,7 +795,7 @@ protected:
}
};
struct C_OSD_AppliedRecoveredObject : public Context {
- boost::intrusive_ptr<ReplicatedPG> pg;
+ ReplicatedPGRef pg;
ObjectStore::Transaction *t;
ObjectContext *obc;
C_OSD_AppliedRecoveredObject(ReplicatedPG *p, ObjectStore::Transaction *tt, ObjectContext *o) :
@@ -793,7 +805,7 @@ protected:
}
};
struct C_OSD_CommittedPushedObject : public Context {
- boost::intrusive_ptr<ReplicatedPG> pg;
+ ReplicatedPGRef pg;
OpRequestRef op;
epoch_t epoch;
eversion_t last_complete;
@@ -818,7 +830,7 @@ protected:
}
};
struct C_OSD_CompletedPull : public Context {
- boost::intrusive_ptr<ReplicatedPG> pg;
+ ReplicatedPGRef pg;
hobject_t hoid;
epoch_t epoch;
C_OSD_CompletedPull(
@@ -835,7 +847,7 @@ protected:
};
friend class C_OSD_CompletedPull;
struct C_OSD_AppliedRecoveredObjectReplica : public Context {
- boost::intrusive_ptr<ReplicatedPG> pg;
+ ReplicatedPGRef pg;
ObjectStore::Transaction *t;
C_OSD_AppliedRecoveredObjectReplica(ReplicatedPG *p, ObjectStore::Transaction *tt) :
pg(p), t(tt) {}
@@ -1021,7 +1033,4 @@ inline ostream& operator<<(ostream& out, ReplicatedPG::AccessMode& mode)
return out;
}
-void intrusive_ptr_add_ref(ReplicatedPG *pg);
-void intrusive_ptr_release(ReplicatedPG *pg);
-
#endif