From f03145f800b4e13ca5bc168855e0dbb62994c73a Mon Sep 17 00:00:00 2001 From: Alan Conway Date: Wed, 15 May 2013 21:53:55 +0000 Subject: QPID-4850: HA Sporadic stall in ha_tests.test_failover_send_receive Fixed a deadlock in the test: test was killing a backup broker then waiting to pass a checkpoint: however if the backup was expected by the primary, the primary holds queues till it connects, so we deadlock. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk/qpid@1483107 13f79535-47bb-0310-9956-ffa450edef68 --- cpp/src/qpid/ha/QueueReplicator.cpp | 1 + cpp/src/qpid/ha/ReplicatingSubscription.h | 4 ++-- cpp/src/tests/ha_tests.py | 8 +------- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'cpp/src') diff --git a/cpp/src/qpid/ha/QueueReplicator.cpp b/cpp/src/qpid/ha/QueueReplicator.cpp index 3580c49826..946831319c 100644 --- a/cpp/src/qpid/ha/QueueReplicator.cpp +++ b/cpp/src/qpid/ha/QueueReplicator.cpp @@ -162,6 +162,7 @@ QueueReplicator::~QueueReplicator() {} // Called from Queue::destroyed() void QueueReplicator::destroy() { + QPID_LOG(debug, logPrefix << " destroyed"); boost::shared_ptr bridge2; // To call outside of lock { Mutex::ScopedLock l(lock); diff --git a/cpp/src/qpid/ha/ReplicatingSubscription.h b/cpp/src/qpid/ha/ReplicatingSubscription.h index 7fcb4ccf13..2780f4fd00 100644 --- a/cpp/src/qpid/ha/ReplicatingSubscription.h +++ b/cpp/src/qpid/ha/ReplicatingSubscription.h @@ -61,8 +61,8 @@ class QueueGuard; * * Lifecycle: broker::Queue holds shared_ptrs to this as a consumer. * - * Lock Hierarchy: ReplicatingSubscription MUS NOT call QueueGuard with it's lock held - * QueueGuard MAY call ReplicatingSubscription with it's lock held. + * Lock Hierarchy: ReplicatingSubscription MUST NOT call QueueGuard with its + * lock held QueueGuard MAY call ReplicatingSubscription with its lock held. */ class ReplicatingSubscription : public broker::SemanticState::ConsumerImpl { diff --git a/cpp/src/tests/ha_tests.py b/cpp/src/tests/ha_tests.py index 3f870ba5b8..8b4ff80dd0 100755 --- a/cpp/src/tests/ha_tests.py +++ b/cpp/src/tests/ha_tests.py @@ -972,19 +972,13 @@ class LongTests(HaBrokerTest): brokers.bounce(victim) # Next one is promoted primary = next else: - brokers.kill(victim, promote_next=False, final=False) - dead = victim + brokers.bounce(victim, promote_next=False) # Make sure we are not stalled map(wait_passed, receivers, checkpoint) # Run another checkpoint to ensure things work in this configuration checkpoint = [ r.received+100 for r in receivers ] map(wait_passed, receivers, checkpoint) - - if dead is not None: - brokers.restart(dead) # Restart backup - brokers[dead].ready() - dead = None i += 1 except: traceback.print_exc() -- cgit v1.2.1