summaryrefslogtreecommitdiff
path: root/cpp/src/tests
diff options
context:
space:
mode:
authorAlan Conway <aconway@apache.org>2009-08-10 21:10:53 +0000
committerAlan Conway <aconway@apache.org>2009-08-10 21:10:53 +0000
commit9a0521e5562ba6bf5c3468eb171c109e166cfa5d (patch)
treea0f3e6be8d28b634a0e2ee94d5fd853dc3562ffb /cpp/src/tests
parent5518c23f9a69f6d616abc770bda0677b2f0b51ac (diff)
downloadqpid-python-9a0521e5562ba6bf5c3468eb171c109e166cfa5d.tar.gz
Watchdog feature to remove unresponsive cluster nodes.
In some intstances (e.g. while resolving an error) it's possible for a hung process to hang the entire cluster as they wait for its response. The cluster can handle terminated processes but hung processes present a problem. If the watchdog plugin is loaded and --watchdog-interval is set then the broker forks a child process that runs a very simple watchdog program, and starts a timer in the broker process to signal the watchdog every interval/2 seconds. The watchdog kills its parent if it does not receive a signal for interval seconds. This allows a stuck broker to be removed from the cluster so other cluster members can continue. git-svn-id: https://svn.apache.org/repos/asf/qpid/trunk/qpid@802927 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'cpp/src/tests')
-rw-r--r--cpp/src/tests/cluster.mk68
-rwxr-xr-xcpp/src/tests/test_watchdog16
2 files changed, 51 insertions, 33 deletions
diff --git a/cpp/src/tests/cluster.mk b/cpp/src/tests/cluster.mk
index dc592fa4d5..6fc4c64a5e 100644
--- a/cpp/src/tests/cluster.mk
+++ b/cpp/src/tests/cluster.mk
@@ -29,44 +29,46 @@ if HAVE_LIBCPG
# ais_check checks pre-requisites for cluster tests and runs them if ok.
-TESTS += \
- ais_check \
- run_cluster_tests \
- federated_cluster_test \
+TESTS += \
+ ais_check \
+ test_watchdog \
+ run_cluster_tests \
+ federated_cluster_test \
clustered_replication_test
-
-EXTRA_DIST += \
- ais_check \
- start_cluster \
- stop_cluster \
- restart_cluster \
- cluster_python_tests \
- cluster_python_tests_failing.txt \
- federated_cluster_test \
- clustered_replication_test \
- run_cluster_tests \
- run_long_cluster_tests \
- testlib.py \
- cluster_tests.py \
- long_cluster_tests.py
-
-LONG_TESTS += \
- run_long_cluster_tests \
- start_cluster \
- cluster_python_tests \
+EXTRA_DIST += \
+ ais_check \
+ start_cluster \
+ stop_cluster \
+ restart_cluster \
+ cluster_python_tests \
+ cluster_python_tests_failing.txt \
+ federated_cluster_test \
+ clustered_replication_test \
+ run_cluster_tests \
+ run_long_cluster_tests \
+ testlib.py \
+ cluster_tests.py \
+ long_cluster_tests.py
+
+LONG_TESTS += \
+ run_long_cluster_tests \
+ start_cluster \
+ cluster_python_tests \
stop_cluster
qpidtest_PROGRAMS += cluster_test
-cluster_test_SOURCES = \
- cluster_test.cpp \
- unit_test.cpp \
- ClusterFixture.cpp \
- ClusterFixture.h \
- ForkedBroker.h \
- ForkedBroker.cpp \
- PartialFailure.cpp \
- ClusterFailover.cpp
+
+cluster_test_SOURCES = \
+ cluster_test.cpp \
+ unit_test.cpp \
+ ClusterFixture.cpp \
+ ClusterFixture.h \
+ ForkedBroker.h \
+ ForkedBroker.cpp \
+ PartialFailure.cpp \
+ ClusterFailover.cpp
+
cluster_test_LDADD=$(lib_client) $(lib_broker) -lboost_unit_test_framework
qpidtest_SCRIPTS += run_cluster_tests cluster_tests.py run_long_cluster_tests long_cluster_tests.py testlib.py
diff --git a/cpp/src/tests/test_watchdog b/cpp/src/tests/test_watchdog
new file mode 100755
index 0000000000..c2f33501b8
--- /dev/null
+++ b/cpp/src/tests/test_watchdog
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Tests for the watchdog plug-in
+
+# Start a broker with watchdog, freeze it with kill -STOP, verify that it is killed.
+export QPID_WATCHDOG_EXE=$PWD/../qpidd_watchdog
+PORT=`../qpidd -dp0 --no-data-dir --auth=no --no-module-dir --load-module $PWD/../.libs/watchdog.so --log-to-file=qpidd_watchdog.log --watchdog-interval 1`
+PID=`../qpidd -cp $PORT`
+kill -STOP $PID
+sleep 2
+
+if kill -0 $PID 2>/dev/null; then
+ echo "Hung process did not die."
+ kill $PID
+else
+ true
+fi