summaryrefslogtreecommitdiff
path: root/qpid/cpp/src/tests
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2009-08-10 21:10:53 +0000
committerAlan Conway <aconway@apache.org>2009-08-10 21:10:53 +0000
commitf1ce462da088bba87d891014c5d8b5c9eeaca238 (patch)
tree89633a0a0a866c96250104ee064011e172868003 /qpid/cpp/src/tests
parent9d4cbbe253442fc625a4d2bf61314df7fa26e842 (diff)
downloadqpid-python-f1ce462da088bba87d891014c5d8b5c9eeaca238.tar.gz
Watchdog feature to remove unresponsive cluster nodes.
In some intstances (e.g. while resolving an error) it's possible for a hung process to hang the entire cluster as they wait for its response. The cluster can handle terminated processes but hung processes present a problem. If the watchdog plugin is loaded and --watchdog-interval is set then the broker forks a child process that runs a very simple watchdog program, and starts a timer in the broker process to signal the watchdog every interval/2 seconds. The watchdog kills its parent if it does not receive a signal for interval seconds. This allows a stuck broker to be removed from the cluster so other cluster members can continue. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk@802927 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'qpid/cpp/src/tests')
-rw-r--r--qpid/cpp/src/tests/cluster.mk68
-rwxr-xr-xqpid/cpp/src/tests/test_watchdog16
2 files changed, 51 insertions, 33 deletions
diff --git a/qpid/cpp/src/tests/cluster.mk b/qpid/cpp/src/tests/cluster.mk
index dc592fa4d5..6fc4c64a5e 100644
--- a/qpid/cpp/src/tests/cluster.mk
+++ b/qpid/cpp/src/tests/cluster.mk
@@ -29,44 +29,46 @@ if HAVE_LIBCPG
# ais_check checks pre-requisites for cluster tests and runs them if ok.
-TESTS += \
- ais_check \
- run_cluster_tests \
- federated_cluster_test \
+TESTS += \
+ ais_check \
+ test_watchdog \
+ run_cluster_tests \
+ federated_cluster_test \
clustered_replication_test
-
-EXTRA_DIST += \
- ais_check \
- start_cluster \
- stop_cluster \
- restart_cluster \
- cluster_python_tests \
- cluster_python_tests_failing.txt \
- federated_cluster_test \
- clustered_replication_test \
- run_cluster_tests \
- run_long_cluster_tests \
- testlib.py \
- cluster_tests.py \
- long_cluster_tests.py
-
-LONG_TESTS += \
- run_long_cluster_tests \
- start_cluster \
- cluster_python_tests \
+EXTRA_DIST += \
+ ais_check \
+ start_cluster \
+ stop_cluster \
+ restart_cluster \
+ cluster_python_tests \
+ cluster_python_tests_failing.txt \
+ federated_cluster_test \
+ clustered_replication_test \
+ run_cluster_tests \
+ run_long_cluster_tests \
+ testlib.py \
+ cluster_tests.py \
+ long_cluster_tests.py
+
+LONG_TESTS += \
+ run_long_cluster_tests \
+ start_cluster \
+ cluster_python_tests \
stop_cluster
qpidtest_PROGRAMS += cluster_test
-cluster_test_SOURCES = \
- cluster_test.cpp \
- unit_test.cpp \
- ClusterFixture.cpp \
- ClusterFixture.h \
- ForkedBroker.h \
- ForkedBroker.cpp \
- PartialFailure.cpp \
- ClusterFailover.cpp
+
+cluster_test_SOURCES = \
+ cluster_test.cpp \
+ unit_test.cpp \
+ ClusterFixture.cpp \
+ ClusterFixture.h \
+ ForkedBroker.h \
+ ForkedBroker.cpp \
+ PartialFailure.cpp \
+ ClusterFailover.cpp
+
cluster_test_LDADD=$(lib_client) $(lib_broker) -lboost_unit_test_framework
qpidtest_SCRIPTS += run_cluster_tests cluster_tests.py run_long_cluster_tests long_cluster_tests.py testlib.py
diff --git a/qpid/cpp/src/tests/test_watchdog b/qpid/cpp/src/tests/test_watchdog
new file mode 100755
index 0000000000..c2f33501b8
--- /dev/null
+++ b/qpid/cpp/src/tests/test_watchdog
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Tests for the watchdog plug-in
+
+# Start a broker with watchdog, freeze it with kill -STOP, verify that it is killed.
+export QPID_WATCHDOG_EXE=$PWD/../qpidd_watchdog
+PORT=`../qpidd -dp0 --no-data-dir --auth=no --no-module-dir --load-module $PWD/../.libs/watchdog.so --log-to-file=qpidd_watchdog.log --watchdog-interval 1`
+PID=`../qpidd -cp $PORT`
+kill -STOP $PID
+sleep 2
+
+if kill -0 $PID 2>/dev/null; then
+ echo "Hung process did not die."
+ kill $PID
+else
+ true
+fi