summaryrefslogtreecommitdiff
path: root/scripts/rabbitmq-server-ha.ocf
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/rabbitmq-server-ha.ocf')
-rwxr-xr-xscripts/rabbitmq-server-ha.ocf214
1 files changed, 48 insertions, 166 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index 84baaba825..6a9e448853 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -319,6 +319,11 @@ $EXTENDED_OCF_PARAMS
END
}
+
+MIN_MASTER_SCORE=100
+BEST_MASTER_SCORE=1000
+
+
#######################################################################
# Functions invoked by resource manager actions
@@ -571,17 +576,21 @@ my_host() {
return $rc
}
-srv_uptime() {
- local stime
- stime=$( crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' )
-
- if [ -z "${stime}" -o "${stime}" = "(null)" ] ; then
- echo 0
- else
- echo $(( $(now) - ${stime} ))
+get_integer_node_attr() {
+ local value
+ value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }')
+ if [ $? -ne 0 -o -z "$value" ] ; then
+ value=0
fi
+ echo $value
+}
- return $OCF_SUCCESS
+get_node_start_time() {
+ get_integer_node_attr $1 'rabbit-start-time'
+}
+
+get_node_master_score() {
+ get_integer_node_attr $1 'master-p_rabbitmq-server'
}
# Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
@@ -1245,7 +1254,7 @@ start_rmq_server_app() {
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
# rabbitmq-server started successfuly as master of cluster
- master_score 1 # minimal positive master-score for this node.
+ master_score $MIN_MASTER_SCORE
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
@@ -1269,7 +1278,7 @@ start_rmq_server_app() {
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
rc=$OCF_SUCCESS
- master_score 1
+ master_score $MIN_MASTER_SCORE
break
else
ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed."
@@ -1400,10 +1409,6 @@ get_monitor() {
local rabbit_running
local name
local node
- local nodelist
- local max
- local our_uptime
- local node_uptime
local node_start_time
ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}"
@@ -1484,45 +1489,37 @@ get_monitor() {
ocf_log info "${LH} ensuring this slave does not get promoted."
master_score 0
return $OCF_ERR_GENERIC
- elif [ $rc -ne $OCF_RUNNING_MASTER ] ; then
- ocf_log info "${LH} preparing to update master score for node"
- our_uptime=$(srv_uptime)
- nodelist=$(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
- max=1
- for node in $nodelist
+ fi
+
+ # Recounting our master score
+ ocf_log info "${LH} preparing to update master score for node"
+ local our_start_time
+ local new_score
+ local node_start_time
+ local node_score
+
+ our_start_time=$(get_node_start_time $THIS_PCMK_NODE)
+
+ if [ $our_start_time -eq 0 ]; then
+ new_score=$MIN_MASTER_SCORE
+ else
+ new_score=$BEST_MASTER_SCORE
+ for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
do
- node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
- if [ -z "${node_start_time}" -o "${node_start_time}" = "(null)" ] ; then
- node_uptime=0
- else
- node_uptime=$(( $(now) - ${node_start_time} ))
- fi
- ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})"
- if [ ${our_uptime} -lt ${node_uptime} ]
- then
- max=1
- break
- else
- # When uptime is equal, accept the existing master - if any - as the oldest node
- is_master $node
- status_master=$?
- if [ $status_master -eq 0 ] ; then
- max=1
- ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})"
- break
- else
- max=0
- fi
+ node_start_time=$(get_node_start_time $node)
+ node_score=$(get_node_master_score $node)
+
+ ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)"
+ if [ $node_start_time -ne 0 -a $node_score -ne 0 -a $node_start_time -lt $our_start_time ]; then
+ new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
fi
done
+ fi
-
- if [ $max -eq 0 ]
- then
- ocf_log info "${LH} we are the oldest node"
- master_score 1000
- fi
+ if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then
+ master_score $new_score
fi
+ ocf_log info "${LH} our start time is $our_start_time and score is $new_score"
# Skip all other checks if rabbit app is not running
if [ $rabbit_running -ne $OCF_SUCCESS ]; then
@@ -1929,28 +1926,6 @@ action_notify() {
echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
- if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'pre' ] ; then
- # PRE- anything notify section
- case "$OCF_RESKEY_CRM_meta_notify_operation" in
- promote)
- ocf_log info "${LH} pre-promote begin."
- my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname"
- rc=$?
- if [ $rc -eq $OCF_SUCCESS ] ; then
- nodelist=$(get_all_pacemaker_nodes)
- for i in $nodelist
- do
- ocf_log info "${LH} Deleting master attribute for node ${i}"
- ocf_run crm_attribute -N $i -l reboot --name 'rabbit-master' --delete
- done
- ocf_log info "${LH} pre-promote end."
- fi
- ;;
- *)
- ;;
- esac
- fi
-
if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
# POST- anything notify section
case "$OCF_RESKEY_CRM_meta_notify_operation" in
@@ -2069,42 +2044,6 @@ action_notify() {
# always returns OCF_SUCCESS
ocf_log info "${LH} post-stop end."
;;
- demote)
- # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
- ocf_log info "${LH} post-demote begin."
- # Report not running, if the list of nodes being demoted reported empty
- if [ -z "${OCF_RESKEY_CRM_meta_notify_demote_uname}" ] ; then
- ocf_log warn "${LH} there are no nodes being demoted reported on post-demote. The resource will be restarted."
- ocf_log info "${LH} post-demote end."
- return $OCF_ERR_GENERIC
- fi
- my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- rc=$?
- if [ $rc -ne $OCF_SUCCESS ] ; then
- # On ohter nodes processing the post-demote, make sure the demoted node will be forgotten
- unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- else
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
- # On the nodes being demoted, reset the master score
- ocf_log info "${LH} resetting the master score."
- master_score 0
- ocf_log info "${LH} Deleting start time attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} master was demoted. stopping RabbitMQ app."
- stop_rmq_server_app
- rc2=$?
- if [ $rc2 -ne $OCF_SUCCESS ] ; then
- ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed"
- ocf_log info "${LH} post-demote end."
- exit $OCF_FAILED_MASTER
- fi
- fi
- ocf_log info "${LH} post-demote end."
- ;;
*) ;;
esac
fi
@@ -2211,68 +2150,11 @@ action_promote() {
action_demote() {
- local rc=$OCF_ERR_GENERIC
local LH="${LL} demote:"
-
- if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
- d=`date '+%Y%m%d %H:%M:%S'`
- echo $d >> /tmp/rmq-demote.log
- env >> /tmp/rmq-demote.log
- echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
-
- fi
-
ocf_log info "${LH} action begin."
-
- get_monitor
- rc=$?
- case "$rc" in
- "$OCF_RUNNING_MASTER")
- # Running as master. Normal, expected behavior.
- ocf_log warn "${LH} Resource is currently running as Master"
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} Deleting start timestamp"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
-
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
-
- stop_rmq_server_app
- rc=$?
- ;;
- "$OCF_SUCCESS")
- # Alread running as slave. Nothing to do.
- ocf_log warn "${LH} Resource is currently running as Slave"
- rc=$OCF_SUCCESS
- ;;
- "$OCF_FAILED_MASTER")
- # Master failed and being demoted.
- ocf_log err "${LH} Demoting of a failed Master."
- ocf_log info "${LH} action end."
- exit $OCF_FAILED_MASTER
- ;;
- "$OCF_NOT_RUNNING")
- ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do."
- rc=$OCF_SUCCESS
- ;;
- "$OCF_ERR_GENERIC")
- ocf_log err "${LH} Error while demote. Stopping resource."
- action_stop
- rc=$?
- ;;
- *)
- # Failed resource. Let the cluster manager recover.
- ocf_log err "${LH} Unexpected error, cannot demote"
- ocf_log info "${LH} action end."
- exit $rc
- ;;
- esac
-
- # transform master RMQ-server to slave
+ ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
ocf_log info "${LH} action end."
- return $rc
+ return $OCF_SUCCESS
}
#######################################################################