summaryrefslogtreecommitdiff
path: root/scripts/rabbitmq-server-ha.ocf
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/rabbitmq-server-ha.ocf')
-rwxr-xr-xscripts/rabbitmq-server-ha.ocf84
1 files changed, 46 insertions, 38 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index d1088bc42d..5505c10581 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -407,8 +407,8 @@ proc_kill()
# OCF_SUCCESS
# LL
# Arguments:
-# $1 - pidfile or pid
-# $2 - service name used for logging
+# $1 - pidfile or pid or 'none', if stopping by the name matching
+# $2 - service name used for logging or for the failback stopping method
# $3 - stop process timeout (in sec), used to determine how many times we try
# SIGTERM and an upper limit on how long this function should try and
# stop the process. Defaults to 15.
@@ -425,16 +425,20 @@ proc_stop()
local i
local pid
local pidfile
- # check if provide just a number
- echo "${pid_param}" | egrep -q '^[0-9]+$'
- if [ $? -eq 0 ]; then
- pid="${pid_param}"
- elif [ -e "${pid_param}" ]; then # check if passed in a pid file
- pidfile="${pid_param}"
- pid=$(cat "${pidfile}" 2>/dev/null | tr -s " " "\n" | sort -u)
- else
- ocf_log warn "${LH} pid param ${pid_param} is not a file or a number, try match by ${service_name}"
+ if [ "${pid_param}" = "none" ] ; then
pid="none"
+ else
+ # check if provide just a number
+ echo "${pid_param}" | egrep -q '^[0-9]+$'
+ if [ $? -eq 0 ]; then
+ pid="${pid_param}"
+ elif [ -e "${pid_param}" ]; then # check if passed in a pid file
+ pidfile="${pid_param}"
+ pid=$(cat "${pidfile}" 2>/dev/null | tr -s " " "\n" | sort -u)
+ else
+ ocf_log warn "${LH} pid param ${pid_param} is not a file or a number, try match by ${service_name}"
+ pid="none"
+ fi
fi
# number of times to try a SIGTEM is (timeout - 5 seconds) / 2 seconds
local stop_count=$(( ($timeout-5)/2 ))
@@ -790,10 +794,14 @@ update_cookie() {
return $OCF_SUCCESS
}
-# Stop rmq beam process by pid or rabbit node name match. Returns SUCCESS/ERROR
+# Stop rmq beam process by pid and by rabbit node name match. Returns SUCCESS/ERROR
kill_rmq_and_remove_pid() {
local LH="${LL} kill_rmq_and_remove_pid():"
+ # Stop the rabbitmq-server by its pidfile, use the name matching as a fallback,
+ # and ignore the exit code
proc_stop "${OCF_RESKEY_pid_file}" "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
+ # Ensure the beam.smp stopped by the rabbit node name matching as well
+ proc_stop none "beam.*${RABBITMQ_NODENAME}" "${OCF_RESKEY_stop_time}"
if [ $? -eq 0 ] ; then
return $OCF_SUCCESS
else
@@ -967,9 +975,11 @@ stop_server_process() {
[ $? -eq 0 ] && ocf_log info "${LH} RMQ-server process (PID=${pid}) stopped succesfully."
fi
- if [ -f ${OCF_RESKEY_pid_file} ] ; then
- # Ensure there is no beam process and pidfile left
- ocf_log warn "${LH} The pidfile still exists, forcing the RMQ-server cleanup"
+ # Ensure there is no beam process and pidfile left
+ pgrep -f "beam.*${RABBITMQ_NODENAME}" > /dev/null
+ rc=$?
+ if [ -f ${OCF_RESKEY_pid_file} -o $rc -eq 0 ] ; then
+ ocf_log warn "${LH} The pidfile or beam's still exist, forcing the RMQ-server cleanup"
kill_rmq_and_remove_pid
fi
@@ -1399,34 +1409,32 @@ get_monitor() {
if [ $rabbit_running -eq $OCF_SUCCESS ]
then
ocf_log info "${LH} rabbit app is running. checking if we are the part of healthy cluster"
- rc_check=$OCF_ERR_GENERIC
- nodelist=$(get_alive_pacemaker_nodes_but)
- for node in $nodelist
- do
- status_master=1
- # Do not refetch the master status for *this* node as we know it already
- if [ $rc -ne $OCF_RUNNING_MASTER ] ; then
+
+ if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
+ # The master is always running inside of its cluster
+ ocf_log info "${LH} rabbit app is running and is master of cluster"
+ rc_check=$OCF_SUCCESS
+ else
+ rc_check=$OCF_ERR_GENERIC
+ nodelist=$(get_alive_pacemaker_nodes_but)
+ for node in $nodelist
+ do
ocf_log info "${LH} rabbit app is running. looking for master on $node"
is_master $node
status_master=$?
ocf_log info "${LH} fetched master attribute for $node. attr value is ${status_master}"
- else
- # The master is always running inside of its cluster
- ocf_log info "${LH} rabbit app is running and is member of healthy cluster"
- rc_check=$OCF_SUCCESS
- break
- fi
- if [ $status_master -eq 0 ] ; then
- ocf_log info "${LH} rabbit app is running. master is $node"
- if get_running_nodes | grep -q $(rabbit_node_name $node)
- then
- ocf_log info "${LH} rabbit app is running and is member of healthy cluster"
- rc_check=$OCF_SUCCESS
- break
+ if [ $status_master -eq 0 ] ; then
+ ocf_log info "${LH} rabbit app is running. master is $node"
+ if get_running_nodes | grep -q $(rabbit_node_name $node)
+ then
+ ocf_log info "${LH} rabbit app is running and is member of healthy cluster"
+ rc_check=$OCF_SUCCESS
+ break
+ fi
fi
- fi
- done
- [ $rc_check -eq $OCF_ERR_GENERIC ] && ocf_log err "${LH} rabbit node is running out of the cluster"
+ done
+ [ $rc_check -eq $OCF_ERR_GENERIC ] && ocf_log err "${LH} rabbit node is running out of the cluster"
+ fi
else
if [ "$OCF_CHECK_LEVEL" -gt 20 ]; then
ocf_log info "${LH} rabbit app is not running. checking if there is a master"