summaryrefslogtreecommitdiff
path: root/scripts/rabbitmq-server-ha.ocf
diff options
context:
space:
mode:
authorBogdan Dobrelya <bdobrelia@mirantis.com>2016-02-04 12:34:39 +0100
committerBogdan Dobrelya <bdobrelia@mirantis.com>2016-02-04 12:34:39 +0100
commit4b08c2875809b05fd5fc8cf2e3c22684a1a6c548 (patch)
treeff272540ec0702f31899452eda0c9d8635c4ac7a /scripts/rabbitmq-server-ha.ocf
parentf9adbe0beb355f40194c7cc6b548b6c1f40228c5 (diff)
downloadrabbitmq-server-git-4b08c2875809b05fd5fc8cf2e3c22684a1a6c548.tar.gz
Fix action_stop for the rabbit OCF
The action_stop may sometimes stop the rabbitmq-server gracefully by the PID, but leave unresponsive beam.smp processes running and spoiling rabbits. Those shall be stopped as well. The solution is: - make proc_stop() to accept a pid=none to use a name matching instead - make kill_rmq_and_remove_pid() to stop by the beam process matching as well - fix stop_server_process() to ensure there is no beam process left running Related Fuel bug: https://launchpad.net/bugs/1541029 Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
Diffstat (limited to 'scripts/rabbitmq-server-ha.ocf')
-rwxr-xr-xscripts/rabbitmq-server-ha.ocf40
1 files changed, 25 insertions, 15 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index 78629547f7..5505c10581 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -407,8 +407,8 @@ proc_kill()
# OCF_SUCCESS
# LL
# Arguments:
-# $1 - pidfile or pid
-# $2 - service name used for logging
+# $1 - pidfile or pid or 'none', if stopping by the name matching
+# $2 - service name used for logging or for the failback stopping method
# $3 - stop process timeout (in sec), used to determine how many times we try
# SIGTERM and an upper limit on how long this function should try and
# stop the process. Defaults to 15.
@@ -425,16 +425,20 @@ proc_stop()
local i
local pid
local pidfile
- # check if provide just a number
- echo "${pid_param}" | egrep -q '^[0-9]+$'
- if [ $? -eq 0 ]; then
- pid="${pid_param}"
- elif [ -e "${pid_param}" ]; then # check if passed in a pid file
- pidfile="${pid_param}"
- pid=$(cat "${pidfile}" 2>/dev/null | tr -s " " "\n" | sort -u)
- else
- ocf_log warn "${LH} pid param ${pid_param} is not a file or a number, try match by ${service_name}"
+ if [ "${pid_param}" = "none" ] ; then
pid="none"
+ else
+ # check if provide just a number
+ echo "${pid_param}" | egrep -q '^[0-9]+$'
+ if [ $? -eq 0 ]; then
+ pid="${pid_param}"
+ elif [ -e "${pid_param}" ]; then # check if passed in a pid file
+ pidfile="${pid_param}"
+ pid=$(cat "${pidfile}" 2>/dev/null | tr -s " " "\n" | sort -u)
+ else
+ ocf_log warn "${LH} pid param ${pid_param} is not a file or a number, try match by ${service_name}"
+ pid="none"
+ fi
fi
# number of times to try a SIGTEM is (timeout - 5 seconds) / 2 seconds
local stop_count=$(( ($timeout-5)/2 ))
@@ -790,10 +794,14 @@ update_cookie() {
return $OCF_SUCCESS
}
-# Stop rmq beam process by pid or rabbit node name match. Returns SUCCESS/ERROR
+# Stop rmq beam process by pid and by rabbit node name match. Returns SUCCESS/ERROR
kill_rmq_and_remove_pid() {
local LH="${LL} kill_rmq_and_remove_pid():"
+ # Stop the rabbitmq-server by its pidfile, use the name matching as a fallback,
+ # and ignore the exit code
proc_stop "${OCF_RESKEY_pid_file}" "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
+ # Ensure the beam.smp stopped by the rabbit node name matching as well
+ proc_stop none "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
if [ $? -eq 0 ] ; then
return $OCF_SUCCESS
else
@@ -967,9 +975,11 @@ stop_server_process() {
[ $? -eq 0 ] && ocf_log info "${LH} RMQ-server process (PID=${pid}) stopped succesfully."
fi
- if [ -f ${OCF_RESKEY_pid_file} ] ; then
- # Ensure there is no beam process and pidfile left
- ocf_log warn "${LH} The pidfile still exists, forcing the RMQ-server cleanup"
+ # Ensure there is no beam process and pidfile left
+ pgrep -f "beam.*${RABBITMQ_NODENAME}" > /dev/null
+ rc=$?
+ if [ -f ${OCF_RESKEY_pid_file} -o $rc -eq 0 ] ; then
+ ocf_log warn "${LH} The pidfile or beam's still exist, forcing the RMQ-server cleanup"
kill_rmq_and_remove_pid
fi