summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorDmitry Mescheryakov <dmescheryakov@mirantis.com>2016-08-22 14:19:21 +0300
committerDmitry Mescheryakov <dmescheryakov@mirantis.com>2016-08-22 14:27:22 +0300
commit5a6f61c423b19b33e36f0be0f995a1636b310873 (patch)
treea2918eaaa40d892fa2dc7e8ba20feb6550a5096d /scripts
parentd4020f5f50f118f870ead1d1180f79e75032f6f7 (diff)
downloadrabbitmq-server-git-5a6f61c423b19b33e36f0be0f995a1636b310873.tar.gz
[OCF HA] Rank master score based on start time
Right now we assign 1000 to the oldest nodes and 1 to others. That creates a problem when Master restarts and no node is promoted until that node starts back. In that case the returned node will have score of 1, like all other slaves and Pacemaker will select to promote it again. The node is clean empty and afterwards other slaves join to it, wiping their data as well. As a result, we loose all the messages. The new algorithm actually ranks nodes, not just selects the oldest one. It also maintains the invariant that if node A started later than node B, then node A score must be smaller than that of node B. As a result, freshly started node has no chance of being selected in preference to older node. If several nodes start simultaneously, among them an older node might temporarily receive lower score than a younger one, but that is neglectable. Also remove any action on demote or demote notification - all of these duplicate actions done in stop or stop notification. With these removed, changing master on a running cluster does not affect RabbitMQ cluster in any way - we just declare another node master and that is it. It is important for the current change because master score might change after initial cluster start up causing master migration from one node to another. This fix is a prerequsite for fix to Fuel bugs https://bugs.launchpad.net/fuel/+bug/1559136 https://bugs.launchpad.net/mos/+bug/1561894
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/rabbitmq-server-ha.ocf214
1 files changed, 48 insertions, 166 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index 84baaba825..6a9e448853 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -319,6 +319,11 @@ $EXTENDED_OCF_PARAMS
END
}
+
+MIN_MASTER_SCORE=100
+BEST_MASTER_SCORE=1000
+
+
#######################################################################
# Functions invoked by resource manager actions
@@ -571,17 +576,21 @@ my_host() {
return $rc
}
-srv_uptime() {
- local stime
- stime=$( crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' )
-
- if [ -z "${stime}" -o "${stime}" = "(null)" ] ; then
- echo 0
- else
- echo $(( $(now) - ${stime} ))
+get_integer_node_attr() {
+ local value
+ value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }')
+ if [ $? -ne 0 -o -z "$value" ] ; then
+ value=0
fi
+ echo $value
+}
- return $OCF_SUCCESS
+get_node_start_time() {
+ get_integer_node_attr $1 'rabbit-start-time'
+}
+
+get_node_master_score() {
+ get_integer_node_attr $1 'master-p_rabbitmq-server'
}
# Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
@@ -1245,7 +1254,7 @@ start_rmq_server_app() {
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
# rabbitmq-server started successfuly as master of cluster
- master_score 1 # minimal positive master-score for this node.
+ master_score $MIN_MASTER_SCORE
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
@@ -1269,7 +1278,7 @@ start_rmq_server_app() {
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
rc=$OCF_SUCCESS
- master_score 1
+ master_score $MIN_MASTER_SCORE
break
else
ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed."
@@ -1400,10 +1409,6 @@ get_monitor() {
local rabbit_running
local name
local node
- local nodelist
- local max
- local our_uptime
- local node_uptime
local node_start_time
ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}"
@@ -1484,45 +1489,37 @@ get_monitor() {
ocf_log info "${LH} ensuring this slave does not get promoted."
master_score 0
return $OCF_ERR_GENERIC
- elif [ $rc -ne $OCF_RUNNING_MASTER ] ; then
- ocf_log info "${LH} preparing to update master score for node"
- our_uptime=$(srv_uptime)
- nodelist=$(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
- max=1
- for node in $nodelist
+ fi
+
+ # Recounting our master score
+ ocf_log info "${LH} preparing to update master score for node"
+ local our_start_time
+ local new_score
+ local node_start_time
+ local node_score
+
+ our_start_time=$(get_node_start_time $THIS_PCMK_NODE)
+
+ if [ $our_start_time -eq 0 ]; then
+ new_score=$MIN_MASTER_SCORE
+ else
+ new_score=$BEST_MASTER_SCORE
+ for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
do
- node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
- if [ -z "${node_start_time}" -o "${node_start_time}" = "(null)" ] ; then
- node_uptime=0
- else
- node_uptime=$(( $(now) - ${node_start_time} ))
- fi
- ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})"
- if [ ${our_uptime} -lt ${node_uptime} ]
- then
- max=1
- break
- else
- # When uptime is equal, accept the existing master - if any - as the oldest node
- is_master $node
- status_master=$?
- if [ $status_master -eq 0 ] ; then
- max=1
- ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})"
- break
- else
- max=0
- fi
+ node_start_time=$(get_node_start_time $node)
+ node_score=$(get_node_master_score $node)
+
+ ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)"
+ if [ $node_start_time -ne 0 -a $node_score -ne 0 -a $node_start_time -lt $our_start_time ]; then
+ new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
fi
done
+ fi
-
- if [ $max -eq 0 ]
- then
- ocf_log info "${LH} we are the oldest node"
- master_score 1000
- fi
+ if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then
+ master_score $new_score
fi
+ ocf_log info "${LH} our start time is $our_start_time and score is $new_score"
# Skip all other checks if rabbit app is not running
if [ $rabbit_running -ne $OCF_SUCCESS ]; then
@@ -1929,28 +1926,6 @@ action_notify() {
echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
- if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'pre' ] ; then
- # PRE- anything notify section
- case "$OCF_RESKEY_CRM_meta_notify_operation" in
- promote)
- ocf_log info "${LH} pre-promote begin."
- my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname"
- rc=$?
- if [ $rc -eq $OCF_SUCCESS ] ; then
- nodelist=$(get_all_pacemaker_nodes)
- for i in $nodelist
- do
- ocf_log info "${LH} Deleting master attribute for node ${i}"
- ocf_run crm_attribute -N $i -l reboot --name 'rabbit-master' --delete
- done
- ocf_log info "${LH} pre-promote end."
- fi
- ;;
- *)
- ;;
- esac
- fi
-
if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
# POST- anything notify section
case "$OCF_RESKEY_CRM_meta_notify_operation" in
@@ -2069,42 +2044,6 @@ action_notify() {
# always returns OCF_SUCCESS
ocf_log info "${LH} post-stop end."
;;
- demote)
- # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
- ocf_log info "${LH} post-demote begin."
- # Report not running, if the list of nodes being demoted reported empty
- if [ -z "${OCF_RESKEY_CRM_meta_notify_demote_uname}" ] ; then
- ocf_log warn "${LH} there are no nodes being demoted reported on post-demote. The resource will be restarted."
- ocf_log info "${LH} post-demote end."
- return $OCF_ERR_GENERIC
- fi
- my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- rc=$?
- if [ $rc -ne $OCF_SUCCESS ] ; then
- # On ohter nodes processing the post-demote, make sure the demoted node will be forgotten
- unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- else
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
- # On the nodes being demoted, reset the master score
- ocf_log info "${LH} resetting the master score."
- master_score 0
- ocf_log info "${LH} Deleting start time attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} master was demoted. stopping RabbitMQ app."
- stop_rmq_server_app
- rc2=$?
- if [ $rc2 -ne $OCF_SUCCESS ] ; then
- ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed"
- ocf_log info "${LH} post-demote end."
- exit $OCF_FAILED_MASTER
- fi
- fi
- ocf_log info "${LH} post-demote end."
- ;;
*) ;;
esac
fi
@@ -2211,68 +2150,11 @@ action_promote() {
action_demote() {
- local rc=$OCF_ERR_GENERIC
local LH="${LL} demote:"
-
- if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
- d=`date '+%Y%m%d %H:%M:%S'`
- echo $d >> /tmp/rmq-demote.log
- env >> /tmp/rmq-demote.log
- echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
-
- fi
-
ocf_log info "${LH} action begin."
-
- get_monitor
- rc=$?
- case "$rc" in
- "$OCF_RUNNING_MASTER")
- # Running as master. Normal, expected behavior.
- ocf_log warn "${LH} Resource is currently running as Master"
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} Deleting start timestamp"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
-
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
-
- stop_rmq_server_app
- rc=$?
- ;;
- "$OCF_SUCCESS")
- # Alread running as slave. Nothing to do.
- ocf_log warn "${LH} Resource is currently running as Slave"
- rc=$OCF_SUCCESS
- ;;
- "$OCF_FAILED_MASTER")
- # Master failed and being demoted.
- ocf_log err "${LH} Demoting of a failed Master."
- ocf_log info "${LH} action end."
- exit $OCF_FAILED_MASTER
- ;;
- "$OCF_NOT_RUNNING")
- ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do."
- rc=$OCF_SUCCESS
- ;;
- "$OCF_ERR_GENERIC")
- ocf_log err "${LH} Error while demote. Stopping resource."
- action_stop
- rc=$?
- ;;
- *)
- # Failed resource. Let the cluster manager recover.
- ocf_log err "${LH} Unexpected error, cannot demote"
- ocf_log info "${LH} action end."
- exit $rc
- ;;
- esac
-
- # transform master RMQ-server to slave
+ ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
ocf_log info "${LH} action end."
- return $rc
+ return $OCF_SUCCESS
}
#######################################################################