diff options
author | Stephen D. Huston <shuston@apache.org> | 2011-10-21 14:42:12 +0000 |
---|---|---|
committer | Stephen D. Huston <shuston@apache.org> | 2011-10-21 14:42:12 +0000 |
commit | f83677056891e436bf5ba99e79240df2a44528cd (patch) | |
tree | 625bfd644b948e89105630759cf6decb0435354d /cpp/src/tests/cluster_tests.py | |
parent | ebfd9ff053b04ab379acfc0fefedee5a31b6d8a5 (diff) | |
download | qpid-python-QPID-2519.tar.gz |
Merged out from trunkQPID-2519
git-svn-id: https://svn.apache.org/repos/asf/qpid/branches/QPID-2519@1187375 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'cpp/src/tests/cluster_tests.py')
-rwxr-xr-x | cpp/src/tests/cluster_tests.py | 1033 |
1 files changed, 987 insertions, 46 deletions
diff --git a/cpp/src/tests/cluster_tests.py b/cpp/src/tests/cluster_tests.py index cbad4010b4..0e80e06d34 100755 --- a/cpp/src/tests/cluster_tests.py +++ b/cpp/src/tests/cluster_tests.py @@ -18,12 +18,13 @@ # under the License. # -import os, signal, sys, time, imp, re, subprocess, glob, cluster_test_logs +import os, signal, sys, time, imp, re, subprocess, glob, random, logging +import cluster_test_logs from qpid import datatypes, messaging from brokertest import * from qpid.harness import Skipped -from qpid.messaging import Message, Empty -from threading import Thread, Lock +from qpid.messaging import Message, Empty, Disposition, REJECTED, util +from threading import Thread, Lock, Condition from logging import getLogger from itertools import chain from tempfile import NamedTemporaryFile @@ -96,9 +97,15 @@ class ShortTests(BrokerTest): destination="amq.direct", message=qpid.datatypes.Message(props, "content")) + # Try message with TTL and differnet headers/properties + cluster[0].send_message("q", Message(durable=True, ttl=100000)) + cluster[0].send_message("q", Message(durable=True, properties={}, ttl=100000)) + cluster[0].send_message("q", Message(durable=True, properties={"x":10}, ttl=100000)) + # Now update a new member and compare their dumps. cluster.start(args=["--test-store-dump", "updatee.dump"]) assert readfile("direct.dump") == readfile("updatee.dump") + os.remove("direct.dump") os.remove("updatee.dump") @@ -108,19 +115,22 @@ class ShortTests(BrokerTest): acl=os.path.join(os.getcwd(), "policy.acl") aclf=file(acl,"w") aclf.write(""" -acl deny zag@QPID create queue -acl allow all all +acl allow zig@QPID all all +acl deny all all """) aclf.close() - cluster = self.cluster(2, args=["--auth", "yes", + cluster = self.cluster(1, args=["--auth", "yes", "--sasl-config", sasl_config, "--load-module", os.getenv("ACL_LIB"), "--acl-file", acl]) # Valid user/password, ensure queue is created. c = cluster[0].connect(username="zig", password="zig") - c.session().sender("ziggy;{create:always}") + c.session().sender("ziggy;{create:always,node:{x-declare:{exclusive:true}}}") c.close() + cluster.start() # Start second node. + + # Check queue is created on second node. c = cluster[1].connect(username="zig", password="zig") c.session().receiver("ziggy;{assert:always}") c.close() @@ -149,7 +159,7 @@ acl allow all all self.fail("Expected exception") except messaging.exceptions.UnauthorizedAccess: pass # make sure the queue was not created at the other node. - c = cluster[0].connect(username="zag", password="zag") + c = cluster[1].connect(username="zig", password="zig") try: s = c.session() s.sender("zaggy;{assert:always}") @@ -157,6 +167,35 @@ acl allow all all self.fail("Expected exception") except messaging.exceptions.NotFound: pass + def test_sasl_join(self): + """Verify SASL authentication between brokers when joining a cluster.""" + sasl_config=os.path.join(self.rootdir, "sasl_config") + # Test with a valid username/password + cluster = self.cluster(1, args=["--auth", "yes", + "--sasl-config", sasl_config, + "--load-module", os.getenv("ACL_LIB"), + "--cluster-username=zig", + "--cluster-password=zig", + "--cluster-mechanism=PLAIN" + ]) + cluster.start() + cluster.ready() + c = cluster[1].connect(username="zag", password="zag") + + # Test with an invalid username/password + cluster = self.cluster(1, args=["--auth", "yes", + "--sasl-config", sasl_config, + "--load-module", os.getenv("ACL_LIB"), + "--cluster-username=x", + "--cluster-password=y", + "--cluster-mechanism=PLAIN" + ]) + try: + cluster.start(expect=EXPECT_EXIT_OK) + cluster[1].ready() + self.fail("Expected exception") + except: pass + def test_user_id_update(self): """Ensure that user-id of an open session is updated to new cluster members""" sasl_config=os.path.join(self.rootdir, "sasl_config") @@ -246,25 +285,6 @@ acl allow all all session1 = cluster[1].connect().session() for q in queues: self.assert_browse(session1, "q1", ["foo"]) - def test_dr_no_message(self): - """Regression test for https://bugzilla.redhat.com/show_bug.cgi?id=655141 - Joining broker crashes with 'error deliveryRecord no update message' - """ - - cluster = self.cluster(1) - session0 = cluster[0].connect().session() - s = session0.sender("q1;{create:always}") - s.send(Message("a", ttl=0.05), sync=False) - s.send(Message("b", ttl=0.05), sync=False) - r1 = session0.receiver("q1") - self.assertEqual("a", r1.fetch(timeout=0).content) - r2 = session0.receiver("q1;{mode:browse}") - self.assertEqual("b", r2.fetch(timeout=0).content) - # Leave messages un-acknowledged, let the expire, then start new broker. - time.sleep(.1) - cluster.start() - self.assertRaises(Empty, cluster[1].connect().session().receiver("q1").fetch,0) - def test_route_update(self): """Regression test for https://issues.apache.org/jira/browse/QPID-2982 Links and bridges associated with routes were not replicated on update. @@ -272,6 +292,7 @@ acl allow all all client was attached. """ args=["--mgmt-pub-interval=1","--log-enable=trace+:management"] + # First broker will be killed. cluster0 = self.cluster(1, args=args) cluster1 = self.cluster(1, args=args) assert 0 == subprocess.call( @@ -301,9 +322,695 @@ acl allow all all qpid_tool.wait() scanner.join() assert scanner.found + # Regression test for https://issues.apache.org/jira/browse/QPID-3235 + # Inconsistent stats when changing elder. + + # Force a change of elder + cluster0.start() + cluster0[0].expect=EXPECT_EXIT_FAIL # About to die. + cluster0[0].kill() + time.sleep(2) # Allow a management interval to pass. # Verify logs are consistent cluster_test_logs.verify_logs() + def test_redelivered(self): + """Verify that redelivered flag is set correctly on replayed messages""" + cluster = self.cluster(2, expect=EXPECT_EXIT_FAIL) + url = "amqp:tcp:%s,tcp:%s" % (cluster[0].host_port(), cluster[1].host_port()) + queue = "my-queue" + cluster[0].declare_queue(queue) + self.sender = self.popen( + ["qpid-send", + "--broker", url, + "--address", queue, + "--sequence=true", + "--send-eos=1", + "--messages=100000", + "--connection-options={%s}"%(Cluster.CONNECTION_OPTIONS) + ]) + self.receiver = self.popen( + ["qpid-receive", + "--broker", url, + "--address", queue, + "--ignore-duplicates", + "--check-redelivered", + "--connection-options={%s}"%(Cluster.CONNECTION_OPTIONS), + "--forever" + ]) + time.sleep(1)#give sender enough time to have some messages to replay + cluster[0].kill() + self.sender.wait() + self.receiver.wait() + cluster[1].kill() + + class BlockedSend(Thread): + """Send a message, send is expected to block. + Verify that it does block (for a given timeout), then allow + waiting till it unblocks when it is expected to do so.""" + def __init__(self, sender, msg): + self.sender, self.msg = sender, msg + self.blocked = True + self.condition = Condition() + self.timeout = 0.1 # Time to wait for expected results. + Thread.__init__(self) + def run(self): + try: + self.sender.send(self.msg, sync=True) + self.condition.acquire() + try: + self.blocked = False + self.condition.notify() + finally: self.condition.release() + except Exception,e: print "BlockedSend exception: %s"%e + def start(self): + Thread.start(self) + time.sleep(self.timeout) + assert self.blocked # Expected to block + def assert_blocked(self): assert self.blocked + def wait(self): # Now expecting to unblock + self.condition.acquire() + try: + while self.blocked: + self.condition.wait(self.timeout) + if self.blocked: raise Exception("Timed out waiting for send to unblock") + finally: self.condition.release() + self.join() + + def queue_flowlimit_test(self, brokers): + """Verify that the queue's flowlimit configuration and state are + correctly replicated. + The brokers argument allows this test to run on single broker, + cluster of 2 pre-startd brokers or cluster where second broker + starts after queue is in flow control. + """ + # configure a queue with a specific flow limit on first broker + ssn0 = brokers.first().connect().session() + s0 = ssn0.sender("flq; {create:always, node:{type:queue, x-declare:{arguments:{'qpid.flow_stop_count':5, 'qpid.flow_resume_count':3}}}}") + brokers.first().startQmf() + q1 = [q for q in brokers.first().qmf_session.getObjects(_class="queue") if q.name == "flq"][0] + oid = q1.getObjectId() + self.assertEqual(q1.name, "flq") + self.assertEqual(q1.arguments, {u'qpid.flow_stop_count': 5L, u'qpid.flow_resume_count': 3L}) + assert not q1.flowStopped + self.assertEqual(q1.flowStoppedCount, 0) + + # fill the queue on one broker until flow control is active + for x in range(5): s0.send(Message(str(x))) + sender = ShortTests.BlockedSend(s0, Message(str(6))) + sender.start() # Tests that sender does block + # Verify the broker queue goes into a flowStopped state + deadline = time.time() + 1 + while not q1.flowStopped and time.time() < deadline: q1.update() + assert q1.flowStopped + self.assertEqual(q1.flowStoppedCount, 1) + sender.assert_blocked() # Still blocked + + # Now verify the both brokers in cluster have same configuration + brokers.second().startQmf() + qs = brokers.second().qmf_session.getObjects(_objectId=oid) + self.assertEqual(len(qs), 1) + q2 = qs[0] + self.assertEqual(q2.name, "flq") + self.assertEqual(q2.arguments, {u'qpid.flow_stop_count': 5L, u'qpid.flow_resume_count': 3L}) + assert q2.flowStopped + self.assertEqual(q2.flowStoppedCount, 1) + + # now drain the queue using a session to the other broker + ssn1 = brokers.second().connect().session() + r1 = ssn1.receiver("flq", capacity=6) + for x in range(4): + r1.fetch(timeout=0) + ssn1.acknowledge() + sender.wait() # Verify no longer blocked. + + # and re-verify state of queue on both brokers + q1.update() + assert not q1.flowStopped + q2.update() + assert not q2.flowStopped + + ssn0.connection.close() + ssn1.connection.close() + cluster_test_logs.verify_logs() + + def test_queue_flowlimit(self): + """Test flow limits on a standalone broker""" + broker = self.broker() + class Brokers: + def first(self): return broker + def second(self): return broker + self.queue_flowlimit_test(Brokers()) + + def test_queue_flowlimit_cluster(self): + cluster = self.cluster(2) + class Brokers: + def first(self): return cluster[0] + def second(self): return cluster[1] + self.queue_flowlimit_test(Brokers()) + + def test_queue_flowlimit_cluster_join(self): + cluster = self.cluster(1) + class Brokers: + def first(self): return cluster[0] + def second(self): + if len(cluster) == 1: cluster.start() + return cluster[1] + self.queue_flowlimit_test(Brokers()) + + def test_queue_flowlimit_replicate(self): + """ Verify that a queue which is in flow control BUT has drained BELOW + the flow control 'stop' threshold, is correctly replicated when a new + broker is added to the cluster. + """ + + class AsyncSender(Thread): + """Send a fixed number of msgs from a sender in a separate thread + so it may block without blocking the test. + """ + def __init__(self, broker, address, count=1, size=4): + Thread.__init__(self) + self.daemon = True + self.broker = broker + self.queue = address + self.count = count + self.size = size + self.done = False + + def run(self): + self.sender = subprocess.Popen(["qpid-send", + "--capacity=1", + "--content-size=%s" % self.size, + "--messages=%s" % self.count, + "--failover-updates", + "--connection-options={%s}"%(Cluster.CONNECTION_OPTIONS), + "--address=%s" % self.queue, + "--broker=%s" % self.broker.host_port()]) + self.sender.wait() + self.done = True + + cluster = self.cluster(2) + # create a queue with rather draconian flow control settings + ssn0 = cluster[0].connect().session() + s0 = ssn0.sender("flq; {create:always, node:{type:queue, x-declare:{arguments:{'qpid.flow_stop_count':100, 'qpid.flow_resume_count':20}}}}") + + # fire off the sending thread to broker[0], and wait until the queue + # hits flow control on broker[1] + sender = AsyncSender(cluster[0], "flq", count=110); + sender.start(); + + cluster[1].startQmf() + q_obj = [q for q in cluster[1].qmf_session.getObjects(_class="queue") if q.name == "flq"][0] + deadline = time.time() + 10 + while not q_obj.flowStopped and time.time() < deadline: + q_obj.update() + assert q_obj.flowStopped + assert not sender.done + assert q_obj.msgDepth < 110 + + # Now drain enough messages on broker[1] to drop below the flow stop + # threshold, but not relieve flow control... + receiver = subprocess.Popen(["qpid-receive", + "--messages=15", + "--timeout=1", + "--print-content=no", + "--failover-updates", + "--connection-options={%s}"%(Cluster.CONNECTION_OPTIONS), + "--ack-frequency=1", + "--address=flq", + "--broker=%s" % cluster[1].host_port()]) + receiver.wait() + q_obj.update() + assert q_obj.flowStopped + assert not sender.done + current_depth = q_obj.msgDepth + + # add a new broker to the cluster, and verify that the queue is in flow + # control on that broker + cluster.start() + cluster[2].startQmf() + q_obj = [q for q in cluster[2].qmf_session.getObjects(_class="queue") if q.name == "flq"][0] + assert q_obj.flowStopped + assert q_obj.msgDepth == current_depth + + # now drain the queue on broker[2], and verify that the sender becomes + # unblocked + receiver = subprocess.Popen(["qpid-receive", + "--messages=95", + "--timeout=1", + "--print-content=no", + "--failover-updates", + "--connection-options={%s}"%(Cluster.CONNECTION_OPTIONS), + "--ack-frequency=1", + "--address=flq", + "--broker=%s" % cluster[2].host_port()]) + receiver.wait() + q_obj.update() + assert not q_obj.flowStopped + self.assertEqual(q_obj.msgDepth, 0) + + # verify that the sender has become unblocked + sender.join(timeout=5) + assert not sender.isAlive() + assert sender.done + + def test_blocked_queue_delete(self): + """Verify that producers which are blocked on a queue due to flow + control are unblocked when that queue is deleted. + """ + + cluster = self.cluster(2) + cluster[0].startQmf() + cluster[1].startQmf() + + # configure a queue with a specific flow limit on first broker + ssn0 = cluster[0].connect().session() + s0 = ssn0.sender("flq; {create:always, node:{type:queue, x-declare:{arguments:{'qpid.flow_stop_count':5, 'qpid.flow_resume_count':3}}}}") + q1 = [q for q in cluster[0].qmf_session.getObjects(_class="queue") if q.name == "flq"][0] + oid = q1.getObjectId() + self.assertEqual(q1.name, "flq") + self.assertEqual(q1.arguments, {u'qpid.flow_stop_count': 5L, u'qpid.flow_resume_count': 3L}) + assert not q1.flowStopped + self.assertEqual(q1.flowStoppedCount, 0) + + # fill the queue on one broker until flow control is active + for x in range(5): s0.send(Message(str(x))) + sender = ShortTests.BlockedSend(s0, Message(str(6))) + sender.start() # Tests that sender does block + # Verify the broker queue goes into a flowStopped state + deadline = time.time() + 1 + while not q1.flowStopped and time.time() < deadline: q1.update() + assert q1.flowStopped + self.assertEqual(q1.flowStoppedCount, 1) + sender.assert_blocked() # Still blocked + + # Now verify the both brokers in cluster have same configuration + qs = cluster[1].qmf_session.getObjects(_objectId=oid) + self.assertEqual(len(qs), 1) + q2 = qs[0] + self.assertEqual(q2.name, "flq") + self.assertEqual(q2.arguments, {u'qpid.flow_stop_count': 5L, u'qpid.flow_resume_count': 3L}) + assert q2.flowStopped + self.assertEqual(q2.flowStoppedCount, 1) + + # now delete the blocked queue from other broker + ssn1 = cluster[1].connect().session() + self.evaluate_address(ssn1, "flq;{delete:always}") + sender.wait() # Verify no longer blocked. + + ssn0.connection.close() + ssn1.connection.close() + cluster_test_logs.verify_logs() + + + def test_alternate_exchange_update(self): + """Verify that alternate-exchange on exchanges and queues is propagated to new members of a cluster. """ + cluster = self.cluster(1) + s0 = cluster[0].connect().session() + # create alt queue bound to amq.fanout exchange, will be destination for alternate exchanges + self.evaluate_address(s0, "alt;{create:always,node:{x-bindings:[{exchange:'amq.fanout',queue:alt}]}}") + # create direct exchange ex with alternate-exchange amq.fanout and no queues bound + self.evaluate_address(s0, "ex;{create:always,node:{type:topic, x-declare:{type:'direct', alternate-exchange:'amq.fanout'}}}") + # create queue q with alternate-exchange amq.fanout + self.evaluate_address(s0, "q;{create:always,node:{type:queue, x-declare:{alternate-exchange:'amq.fanout'}}}") + + def verify(broker): + s = broker.connect().session() + # Verify unmatched message goes to ex's alternate. + s.sender("ex").send("foo") + self.assertEqual("foo", s.receiver("alt").fetch(timeout=0).content) + # Verify rejected message goes to q's alternate. + s.sender("q").send("bar") + msg = s.receiver("q").fetch(timeout=0) + self.assertEqual("bar", msg.content) + s.acknowledge(msg, Disposition(REJECTED)) # Reject the message + self.assertEqual("bar", s.receiver("alt").fetch(timeout=0).content) + + verify(cluster[0]) + cluster.start() + verify(cluster[1]) + + def test_binding_order(self): + """Regression test for binding order inconsistency in cluster""" + cluster = self.cluster(1) + c0 = cluster[0].connect() + s0 = c0.session() + # Declare multiple queues bound to same key on amq.topic + def declare(q,max=0): + if max: declare = 'x-declare:{arguments:{"qpid.max_count":%d, "qpid.flow_stop_count":0}}'%max + else: declare = 'x-declare:{}' + bind='x-bindings:[{queue:%s,key:key,exchange:"amq.topic"}]'%(q) + s0.sender("%s;{create:always,node:{%s,%s}}" % (q,declare,bind)) + declare('d',max=4) # Only one with a limit + for q in ['c', 'b','a']: declare(q) + # Add a cluster member, send enough messages to exceed the max count + cluster.start() + try: + s = s0.sender('amq.topic/key') + for m in xrange(1,6): s.send(Message(str(m))) + self.fail("Expected capacity exceeded exception") + except messaging.exceptions.TargetCapacityExceeded: pass + c1 = cluster[1].connect() + s1 = c1.session() + s0 = c0.session() # Old session s0 is broken by exception. + # Verify queue contents are consistent. + for q in ['a','b','c','d']: + self.assertEqual(self.browse(s0, q), self.browse(s1, q)) + # Verify queue contents are "best effort" + for q in ['a','b','c']: self.assert_browse(s1,q,[str(n) for n in xrange(1,6)]) + self.assert_browse(s1,'d',[str(n) for n in xrange(1,5)]) + + def test_deleted_exchange(self): + """QPID-3215: cached exchange reference can cause cluster inconsistencies + if exchange is deleted/recreated + Verify stand-alone case + """ + cluster = self.cluster() + # Verify we do not route message via an exchange that has been destroyed. + cluster.start() + s0 = cluster[0].connect().session() + self.evaluate_address(s0, "ex;{create:always,node:{type:topic}}") + self.evaluate_address(s0, "q;{create:always,node:{x-bindings:[{exchange:'ex',queue:q,key:foo}]}}") + send0 = s0.sender("ex/foo") + send0.send("foo") + self.assert_browse(s0, "q", ["foo"]) + self.evaluate_address(s0, "ex;{delete:always}") + try: + send0.send("bar") # Should fail, exchange is deleted. + self.fail("Expected not-found exception") + except qpid.messaging.NotFound: pass + self.assert_browse(cluster[0].connect().session(), "q", ["foo"]) + + def test_deleted_exchange_inconsistent(self): + """QPID-3215: cached exchange reference can cause cluster inconsistencies + if exchange is deleted/recreated + + Verify cluster inconsistency. + """ + cluster = self.cluster() + cluster.start() + s0 = cluster[0].connect().session() + self.evaluate_address(s0, "ex;{create:always,node:{type:topic}}") + self.evaluate_address(s0, "q;{create:always,node:{x-bindings:[{exchange:'ex',queue:q,key:foo}]}}") + send0 = s0.sender("ex/foo") + send0.send("foo") + self.assert_browse(s0, "q", ["foo"]) + + cluster.start() + s1 = cluster[1].connect().session() + self.evaluate_address(s0, "ex;{delete:always}") + try: + send0.send("bar") + self.fail("Expected not-found exception") + except qpid.messaging.NotFound: pass + + self.assert_browse(s1, "q", ["foo"]) + + + def test_ttl_consistent(self): + """Ensure we don't get inconsistent errors with message that have TTL very close together""" + messages = [ Message(str(i), ttl=i/1000.0) for i in xrange(0,1000)] + messages.append(Message("x")) + cluster = self.cluster(2) + sender = cluster[0].connect().session().sender("q;{create:always}") + + def fetch(b): + receiver = b.connect().session().receiver("q;{create:always}") + while receiver.fetch().content != "x": pass + + for m in messages: sender.send(m, sync=False) + for m in messages: sender.send(m, sync=False) + fetch(cluster[0]) + fetch(cluster[1]) + for m in messages: sender.send(m, sync=False) + cluster.start() + fetch(cluster[2]) + +# Some utility code for transaction tests +XA_RBROLLBACK = 1 +XA_RBTIMEOUT = 2 +XA_OK = 0 +dtx_branch_counter = 0 + +class DtxStatusException(Exception): + def __init__(self, expect, actual): + self.expect = expect + self.actual = actual + + def str(self): + return "DtxStatusException(expect=%s, actual=%s)"%(self.expect, self.actual) + +class DtxTestFixture: + """Bundle together some common requirements for dtx tests.""" + def __init__(self, test, broker, name, exclusive=False): + self.test = test + self.broker = broker + self.name = name + # Use old API. DTX is not supported in messaging API. + self.connection = broker.connect_old() + self.session = self.connection.session(name, 1) # 1 second timeout + self.queue = self.session.queue_declare(name, exclusive=exclusive) + self.session.dtx_select() + self.consumer = None + + def xid(self, id=None): + if id is None: id = self.name + return self.session.xid(format=0, global_id=id) + + def check_status(self, expect, actual): + if expect != actual: raise DtxStatusException(expect, actual) + + def start(self, id=None, resume=False): + self.check_status(XA_OK, self.session.dtx_start(xid=self.xid(id), resume=resume).status) + + def end(self, id=None, suspend=False): + self.check_status(XA_OK, self.session.dtx_end(xid=self.xid(id), suspend=suspend).status) + + def prepare(self, id=None): + self.check_status(XA_OK, self.session.dtx_prepare(xid=self.xid(id)).status) + + def commit(self, id=None, one_phase=True): + self.check_status( + XA_OK, self.session.dtx_commit(xid=self.xid(id), one_phase=one_phase).status) + + def rollback(self, id=None): + self.check_status(XA_OK, self.session.dtx_rollback(xid=self.xid(id)).status) + + def set_timeout(self, timeout, id=None): + self.session.dtx_set_timeout(xid=self.xid(id),timeout=timeout) + + def send(self, messages): + for m in messages: + dp=self.session.delivery_properties(routing_key=self.name) + mp=self.session.message_properties() + self.session.message_transfer(message=qpid.datatypes.Message(dp, mp, m)) + + def accept(self): + """Accept 1 message from queue""" + consumer_tag="%s-consumer"%(self.name) + self.session.message_subscribe(queue=self.name, destination=consumer_tag) + self.session.message_flow(unit = self.session.credit_unit.message, value = 1, destination = consumer_tag) + self.session.message_flow(unit = self.session.credit_unit.byte, value = 0xFFFFFFFFL, destination = consumer_tag) + msg = self.session.incoming(consumer_tag).get(timeout=1) + self.session.message_cancel(destination=consumer_tag) + self.session.message_accept(qpid.datatypes.RangedSet(msg.id)) + return msg + + + def verify(self, sessions, messages): + for s in sessions: + self.test.assert_browse(s, self.name, messages) + +class DtxTests(BrokerTest): + + def test_dtx_update(self): + """Verify that DTX transaction state is updated to a new broker. + Start a collection of transactions, then add a new cluster member, + then verify they commit/rollback correctly on the new broker.""" + + # Note: multiple test have been bundled into one to avoid the need to start/stop + # multiple brokers per test. + + cluster=self.cluster(1) + sessions = [cluster[0].connect().session()] # For verify + + # Transaction that will be open when new member joins, then committed. + t1 = DtxTestFixture(self, cluster[0], "t1") + t1.start() + t1.send(["1", "2"]) + t1.verify(sessions, []) # Not visible outside of transaction + + # Transaction that will be open when new member joins, then rolled back. + t2 = DtxTestFixture(self, cluster[0], "t2") + t2.start() + t2.send(["1", "2"]) + + # Transaction that will be prepared when new member joins, then committed. + t3 = DtxTestFixture(self, cluster[0], "t3") + t3.start() + t3.send(["1", "2"]) + t3.end() + t3.prepare() + t1.verify(sessions, []) # Not visible outside of transaction + + # Transaction that will be prepared when new member joins, then rolled back. + t4 = DtxTestFixture(self, cluster[0], "t4") + t4.start() + t4.send(["1", "2"]) + t4.end() + t4.prepare() + + # Transaction using an exclusive queue + t5 = DtxTestFixture(self, cluster[0], "t5", exclusive=True) + t5.start() + t5.send(["1", "2"]) + + # Accept messages in a transaction before/after join then commit + t6 = DtxTestFixture(self, cluster[0], "t6") + t6.send(["a","b","c"]) + t6.start() + self.assertEqual(t6.accept().body, "a"); + + # Accept messages in a transaction before/after join then roll back + t7 = DtxTestFixture(self, cluster[0], "t7") + t7.send(["a","b","c"]) + t7.start() + self.assertEqual(t7.accept().body, "a"); + + # Ended, suspended transactions across join. + t8 = DtxTestFixture(self, cluster[0], "t8") + t8.start(id="1") + t8.send(["x"]) + t8.end(id="1", suspend=True) + t8.start(id="2") + t8.send(["y"]) + t8.end(id="2") + t8.start() + t8.send("z") + + + # Start new cluster member + cluster.start() + sessions.append(cluster[1].connect().session()) + + # Commit t1 + t1.send(["3","4"]) + t1.verify(sessions, []) + t1.end() + t1.commit(one_phase=True) + t1.verify(sessions, ["1","2","3","4"]) + + # Rollback t2 + t2.send(["3","4"]) + t2.end() + t2.rollback() + t2.verify(sessions, []) + + # Commit t3 + t3.commit(one_phase=False) + t3.verify(sessions, ["1","2"]) + + # Rollback t4 + t4.rollback() + t4.verify(sessions, []) + + # Commit t5 + t5.send(["3","4"]) + t5.verify(sessions, []) + t5.end() + t5.commit(one_phase=True) + t5.verify(sessions, ["1","2","3","4"]) + + # Commit t6 + self.assertEqual(t6.accept().body, "b"); + t6.verify(sessions, ["c"]) + t6.end() + t6.commit(one_phase=True) + t6.session.close() # Make sure they're not requeued by the session. + t6.verify(sessions, ["c"]) + + # Rollback t7 + self.assertEqual(t7.accept().body, "b"); + t7.end() + t7.rollback() + t7.verify(sessions, ["a", "b", "c"]) + + # Resume t8 + t8.end() + t8.commit(one_phase=True) + t8.start("1", resume=True) + t8.end("1") + t8.commit("1", one_phase=True) + t8.commit("2", one_phase=True) + t8.verify(sessions, ["z", "x","y"]) + + + def test_dtx_failover_rollback(self): + """Kill a broker during a transaction, verify we roll back correctly""" + cluster=self.cluster(1, expect=EXPECT_EXIT_FAIL) + cluster.start(expect=EXPECT_RUNNING) + + # Test unprepared at crash + t1 = DtxTestFixture(self, cluster[0], "t1") + t1.send(["a"]) # Not in transaction + t1.start() + t1.send(["b"]) # In transaction + + # Test prepared at crash + t2 = DtxTestFixture(self, cluster[0], "t2") + t2.send(["a"]) # Not in transaction + t2.start() + t2.send(["b"]) # In transaction + t2.end() + t2.prepare() + + # Crash the broker + cluster[0].kill() + + # Transactional changes should not appear + s = cluster[1].connect().session(); + self.assert_browse(s, "t1", ["a"]) + self.assert_browse(s, "t2", ["a"]) + + def test_dtx_timeout(self): + """Verify that dtx timeout works""" + cluster = self.cluster(1) + t1 = DtxTestFixture(self, cluster[0], "t1") + t1.start() + t1.set_timeout(1) + time.sleep(1.1) + try: + t1.end() + self.fail("Expected rollback timeout.") + except DtxStatusException, e: + self.assertEqual(e.actual, XA_RBTIMEOUT) + +class TxTests(BrokerTest): + + def test_tx_update(self): + """Verify that transaction state is updated to a new broker""" + + def make_message(session, body=None, key=None, id=None): + dp=session.delivery_properties(routing_key=key) + mp=session.message_properties(correlation_id=id) + return qpid.datatypes.Message(dp, mp, body) + + cluster=self.cluster(1) + # Use old API. TX is not supported in messaging API. + c = cluster[0].connect_old() + s = c.session("tx-session", 1) + s.queue_declare(queue="q") + # Start transaction + s.tx_select() + s.message_transfer(message=make_message(s, "1", "q")) + # Start new member mid-transaction + cluster.start() + # Do more work + s.message_transfer(message=make_message(s, "2", "q")) + # Commit the transaction and verify the results. + s.tx_commit() + for b in cluster: self.assert_browse(b.connect().session(), "q", ["1","2"]) + + class LongTests(BrokerTest): """Tests that can run for a long time if -DDURATION=<minutes> is set""" def duration(self): @@ -316,22 +1023,28 @@ class LongTests(BrokerTest): # Original cluster will all be killed so expect exit with failure cluster = self.cluster(3, expect=EXPECT_EXIT_FAIL) + for b in cluster: b.ready() # Wait for brokers to be ready for b in cluster: ErrorGenerator(b) # Start sender and receiver threads cluster[0].declare_queue("test-queue") - sender = NumberedSender(cluster[1], 1000) # Max queue depth - receiver = NumberedReceiver(cluster[2], sender) + sender = NumberedSender(cluster[0], 1000) # Max queue depth + receiver = NumberedReceiver(cluster[0], sender) receiver.start() sender.start() + # Wait for sender & receiver to get up and running + retry(lambda: receiver.received > 0) # Kill original brokers, start new ones for the duration. endtime = time.time() + self.duration() i = 0 while time.time() < endtime: + sender.sender.assert_running() + receiver.receiver.assert_running() cluster[i].kill() i += 1 b = cluster.start(expect=EXPECT_EXIT_FAIL) + for b in cluster[i:]: b.ready() ErrorGenerator(b) time.sleep(5) sender.stop() @@ -362,24 +1075,24 @@ class LongTests(BrokerTest): if self.stopped: break self.process = self.broker.test.popen( self.cmd, expect=EXPECT_UNKNOWN) - finally: self.lock.release() - try: exit = self.process.wait() + finally: + self.lock.release() + try: + exit = self.process.wait() except OSError, e: - # Seems to be a race in wait(), it throws - # "no such process" during test shutdown. - # Doesn't indicate a test error, ignore. - return + # Process may already have been killed by self.stop() + break except Exception, e: self.process.unexpected( "client of %s: %s"%(self.broker.name, e)) self.lock.acquire() try: - # Quit and ignore errors if stopped or expecting failure. if self.stopped: break if exit != 0: self.process.unexpected( "client of %s exit code %s"%(self.broker.name, exit)) - finally: self.lock.release() + finally: + self.lock.release() except Exception, e: self.error = RethrownException("Error in ClientLoop.run") @@ -401,7 +1114,7 @@ class LongTests(BrokerTest): args += ["--log-enable=trace+:management"] # Use store if present. if BrokerTest.store_lib: args +=["--load-module", BrokerTest.store_lib] - cluster = self.cluster(3, args) + cluster = self.cluster(3, args, expect=EXPECT_EXIT_FAIL) # brokers will be killed clients = [] # Per-broker list of clients that only connect to one broker. mclients = [] # Management clients that connect to every broker in the cluster. @@ -410,10 +1123,12 @@ class LongTests(BrokerTest): """Start ordinary clients for a broker.""" cmds=[ ["qpid-tool", "localhost:%s"%(broker.port())], - ["qpid-perftest", "--count", 50000, + ["qpid-perftest", "--count=5000", "--durable=yes", "--base-name", str(qpid.datatypes.uuid4()), "--port", broker.port()], - ["qpid-queue-stats", "-a", "localhost:%s" %(broker.port())], - ["testagent", "localhost", str(broker.port())] ] + ["qpid-txtest", "--queue-base-name", "tx-%s"%str(qpid.datatypes.uuid4()), + "--port", broker.port()], + ["qpid-queue-stats", "-a", "localhost:%s" %(broker.port())] + ] clients.append([ClientLoop(broker, cmd) for cmd in cmds]) def start_mclients(broker): @@ -422,7 +1137,8 @@ class LongTests(BrokerTest): mclients.append(ClientLoop(broker, cmd)) endtime = time.time() + self.duration() - runtime = self.duration() / 4 # First run is longer, use quarter of duration. + # For long duration, first run is a quarter of the duration. + runtime = min(5.0, self.duration() / 3.0) alive = 0 # First live cluster member for i in range(len(cluster)): start_clients(cluster[i]) start_mclients(cluster[alive]) @@ -433,7 +1149,7 @@ class LongTests(BrokerTest): for b in cluster[alive:]: b.ready() # Check if a broker crashed. # Kill the first broker, expect the clients to fail. b = cluster[alive] - b.expect = EXPECT_EXIT_FAIL + b.ready() b.kill() # Stop the brokers clients and all the mclients. for c in clients[alive] + mclients: @@ -443,26 +1159,251 @@ class LongTests(BrokerTest): mclients = [] # Start another broker and clients alive += 1 - cluster.start() + cluster.start(expect=EXPECT_EXIT_FAIL) + cluster[-1].ready() # Wait till its ready start_clients(cluster[-1]) start_mclients(cluster[alive]) for c in chain(mclients, *clients): c.stop() - + for b in cluster[alive:]: + b.ready() # Verify still alive + b.kill() # Verify that logs are consistent cluster_test_logs.verify_logs() def test_management_qmf2(self): self.test_management(args=["--mgmt-qmf2=yes"]) - def test_connect_consistent(self): # FIXME aconway 2011-01-18: + def test_connect_consistent(self): args=["--mgmt-pub-interval=1","--log-enable=trace+:management"] cluster = self.cluster(2, args=args) end = time.time() + self.duration() while (time.time() < end): # Get a management interval for i in xrange(1000): cluster[0].connect().close() + cluster_test_logs.verify_logs() + + def test_flowlimit_failover(self): + """Test fail-over during continuous send-receive with flow control + active. + """ + + # Original cluster will all be killed so expect exit with failure + cluster = self.cluster(3, expect=EXPECT_EXIT_FAIL) + for b in cluster: b.ready() # Wait for brokers to be ready + + # create a queue with rather draconian flow control settings + ssn0 = cluster[0].connect().session() + s0 = ssn0.sender("test-queue; {create:always, node:{type:queue, x-declare:{arguments:{'qpid.flow_stop_count':2000, 'qpid.flow_resume_count':100}}}}") + + receiver = NumberedReceiver(cluster[0]) + receiver.start() + senders = [NumberedSender(cluster[0]) for i in range(1,3)] + for s in senders: + s.start() + # Wait for senders & receiver to get up and running + retry(lambda: receiver.received > 2*senders) + + # Kill original brokers, start new ones for the duration. + endtime = time.time() + self.duration(); + i = 0 + while time.time() < endtime: + for s in senders: s.sender.assert_running() + receiver.receiver.assert_running() + for b in cluster[i:]: b.ready() # Check if any broker crashed. + cluster[i].kill() + i += 1 + b = cluster.start(expect=EXPECT_EXIT_FAIL) + time.sleep(5) + for s in senders: + s.stop() + receiver.stop() + for i in range(i, len(cluster)): cluster[i].kill() + + def test_ttl_failover(self): + """Test that messages with TTL don't cause problems in a cluster with failover""" + + class Client(StoppableThread): + + def __init__(self, broker): + StoppableThread.__init__(self) + self.connection = broker.connect(reconnect=True) + self.auto_fetch_reconnect_urls(self.connection) + self.session = self.connection.session() + + def auto_fetch_reconnect_urls(self, conn): + """Replacment for qpid.messaging.util version which is noisy""" + ssn = conn.session("auto-fetch-reconnect-urls") + rcv = ssn.receiver("amq.failover") + rcv.capacity = 10 + + def main(): + while True: + try: + msg = rcv.fetch() + qpid.messaging.util.set_reconnect_urls(conn, msg) + ssn.acknowledge(msg, sync=False) + except messaging.exceptions.LinkClosed: return + except messaging.exceptions.ConnectionError: return + + thread = Thread(name="auto-fetch-reconnect-urls", target=main) + thread.setDaemon(True) + thread.start() + + def stop(self): + StoppableThread.stop(self) + self.connection.detach() + + class Sender(Client): + def __init__(self, broker, address): + Client.__init__(self, broker) + self.sent = 0 # Number of messages _reliably_ sent. + self.sender = self.session.sender(address, capacity=1000) + + def send_counted(self, ttl): + self.sender.send(Message(str(self.sent), ttl=ttl)) + self.sent += 1 + + def run(self): + while not self.stopped: + choice = random.randint(0,4) + if choice == 0: self.send_counted(None) # No ttl + elif choice == 1: self.send_counted(100000) # Large ttl + else: # Small ttl, might expire + self.sender.send(Message("", ttl=random.random()/10)) + self.sender.send(Message("z"), sync=True) # Chaser. + + class Receiver(Client): + + def __init__(self, broker, address): + Client.__init__(self, broker) + self.received = 0 # Number of non-empty (reliable) messages received. + self.receiver = self.session.receiver(address, capacity=1000) + def run(self): + try: + while True: + m = self.receiver.fetch(1) + if m.content == "z": break + if m.content: # Ignore unreliable messages + # Ignore duplicates + if int(m.content) == self.received: self.received += 1 + except Exception,e: self.error = e + + # def test_ttl_failover + + # Original cluster will all be killed so expect exit with failure + # Set small purge interval. + cluster = self.cluster(3, expect=EXPECT_EXIT_FAIL, args=["--queue-purge-interval=1"]) + for b in cluster: b.ready() # Wait for brokers to be ready + + # Python client failover produces noisy WARN logs, disable temporarily + logger = logging.getLogger() + log_level = logger.getEffectiveLevel() + logger.setLevel(logging.ERROR) + sender = None + receiver = None + try: + # Start sender and receiver threads + receiver = Receiver(cluster[0], "q;{create:always}") + receiver.start() + sender = Sender(cluster[0], "q;{create:always}") + sender.start() + # Wait for sender & receiver to get up and running + retry(lambda: receiver.received > 0) + + # Kill brokers in a cycle. + endtime = time.time() + self.duration() + runtime = min(5.0, self.duration() / 4.0) + i = 0 + while time.time() < endtime: + for b in cluster[i:]: b.ready() # Check if any broker crashed. + cluster[i].kill() + i += 1 + b = cluster.start(expect=EXPECT_EXIT_FAIL) + b.ready() + time.sleep(runtime) + sender.stop() + receiver.stop() + for b in cluster[i:]: + b.ready() # Check it didn't crash + b.kill() + self.assertEqual(sender.sent, receiver.received) cluster_test_logs.verify_logs() + finally: + # Detach to avoid slow reconnect attempts during shut-down if test fails. + if sender: sender.connection.detach() + if receiver: receiver.connection.detach() + logger.setLevel(log_level) + + def test_msg_group_failover(self): + """Test fail-over during continuous send-receive of grouped messages. + """ + + class GroupedTrafficGenerator(Thread): + def __init__(self, url, queue, group_key): + Thread.__init__(self) + self.url = url + self.queue = queue + self.group_key = group_key + self.status = -1 + + def run(self): + # generate traffic for approx 10 seconds (2011msgs / 200 per-sec) + cmd = ["msg_group_test", + "--broker=%s" % self.url, + "--address=%s" % self.queue, + "--connection-options={%s}" % (Cluster.CONNECTION_OPTIONS), + "--group-key=%s" % self.group_key, + "--receivers=2", + "--senders=3", + "--messages=2011", + "--send-rate=200", + "--capacity=11", + "--ack-frequency=23", + "--allow-duplicates", + "--group-size=37", + "--randomize-group-size", + "--interleave=13"] + # "--trace"] + self.generator = Popen( cmd ); + self.status = self.generator.wait() + return self.status + + def results(self): + self.join(timeout=30) # 3x assumed duration + if self.isAlive(): return -1 + return self.status + + # Original cluster will all be killed so expect exit with failure + cluster = self.cluster(3, expect=EXPECT_EXIT_FAIL, args=["-t"]) + for b in cluster: b.ready() # Wait for brokers to be ready + + # create a queue with rather draconian flow control settings + ssn0 = cluster[0].connect().session() + q_args = "{'qpid.group_header_key':'group-id', 'qpid.shared_msg_group':1}" + s0 = ssn0.sender("test-group-q; {create:always, node:{type:queue, x-declare:{arguments:%s}}}" % q_args) + + # Kill original brokers, start new ones for the duration. + endtime = time.time() + self.duration(); + i = 0 + while time.time() < endtime: + traffic = GroupedTrafficGenerator( cluster[i].host_port(), + "test-group-q", "group-id" ) + traffic.start() + time.sleep(1) + + for x in range(2): + for b in cluster[i:]: b.ready() # Check if any broker crashed. + cluster[i].kill() + i += 1 + b = cluster.start(expect=EXPECT_EXIT_FAIL) + time.sleep(1) + + # wait for traffic to finish, verify success + self.assertEqual(0, traffic.results()) + + for i in range(i, len(cluster)): cluster[i].kill() + class StoreTests(BrokerTest): """ |