diff options
| author | Dmitry Mescheryakov <dmescheryakov@mirantis.com> | 2016-08-22 14:19:21 +0300 |
|---|---|---|
| committer | Dmitry Mescheryakov <dmescheryakov@mirantis.com> | 2016-08-22 14:27:22 +0300 |
| commit | 5a6f61c423b19b33e36f0be0f995a1636b310873 (patch) | |
| tree | a2918eaaa40d892fa2dc7e8ba20feb6550a5096d /scripts | |
| parent | d4020f5f50f118f870ead1d1180f79e75032f6f7 (diff) | |
| download | rabbitmq-server-git-5a6f61c423b19b33e36f0be0f995a1636b310873.tar.gz | |
[OCF HA] Rank master score based on start time
Right now we assign 1000 to the oldest nodes and 1 to others. That
creates a problem when Master restarts and no node is promoted until
that node starts back. In that case the returned node will have score
of 1, like all other slaves and Pacemaker will select to promote it
again. The node is clean empty and afterwards other slaves join to
it, wiping their data as well. As a result, we loose all the messages.
The new algorithm actually ranks nodes, not just selects the oldest
one. It also maintains the invariant that if node A started later
than node B, then node A score must be smaller than that of
node B. As a result, freshly started node has no chance of being
selected in preference to older node. If several nodes start
simultaneously, among them an older node might temporarily receive
lower score than a younger one, but that is neglectable.
Also remove any action on demote or demote notification - all of
these duplicate actions done in stop or stop notification. With these
removed, changing master on a running cluster does not affect RabbitMQ
cluster in any way - we just declare another node master and that is
it. It is important for the current change because master score might
change after initial cluster start up causing master migration from
one node to another.
This fix is a prerequsite for fix to Fuel bugs
https://bugs.launchpad.net/fuel/+bug/1559136
https://bugs.launchpad.net/mos/+bug/1561894
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/rabbitmq-server-ha.ocf | 214 |
1 files changed, 48 insertions, 166 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf index 84baaba825..6a9e448853 100755 --- a/scripts/rabbitmq-server-ha.ocf +++ b/scripts/rabbitmq-server-ha.ocf @@ -319,6 +319,11 @@ $EXTENDED_OCF_PARAMS END } + +MIN_MASTER_SCORE=100 +BEST_MASTER_SCORE=1000 + + ####################################################################### # Functions invoked by resource manager actions @@ -571,17 +576,21 @@ my_host() { return $rc } -srv_uptime() { - local stime - stime=$( crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' ) - - if [ -z "${stime}" -o "${stime}" = "(null)" ] ; then - echo 0 - else - echo $(( $(now) - ${stime} )) +get_integer_node_attr() { + local value + value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }') + if [ $? -ne 0 -o -z "$value" ] ; then + value=0 fi + echo $value +} - return $OCF_SUCCESS +get_node_start_time() { + get_integer_node_attr $1 'rabbit-start-time' +} + +get_node_master_score() { + get_integer_node_attr $1 'master-p_rabbitmq-server' } # Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn. @@ -1245,7 +1254,7 @@ start_rmq_server_app() { rc=$? if [ $rc -eq $OCF_SUCCESS ] ; then # rabbitmq-server started successfuly as master of cluster - master_score 1 # minimal positive master-score for this node. + master_score $MIN_MASTER_SCORE stop_rmq_server_app rc=$? if [ $rc -ne 0 ] ; then @@ -1269,7 +1278,7 @@ start_rmq_server_app() { if [ $rc -eq $OCF_SUCCESS ]; then ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully." rc=$OCF_SUCCESS - master_score 1 + master_score $MIN_MASTER_SCORE break else ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed." @@ -1400,10 +1409,6 @@ get_monitor() { local rabbit_running local name local node - local nodelist - local max - local our_uptime - local node_uptime local node_start_time ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}" @@ -1484,45 +1489,37 @@ get_monitor() { ocf_log info "${LH} ensuring this slave does not get promoted." master_score 0 return $OCF_ERR_GENERIC - elif [ $rc -ne $OCF_RUNNING_MASTER ] ; then - ocf_log info "${LH} preparing to update master score for node" - our_uptime=$(srv_uptime) - nodelist=$(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE) - max=1 - for node in $nodelist + fi + + # Recounting our master score + ocf_log info "${LH} preparing to update master score for node" + local our_start_time + local new_score + local node_start_time + local node_score + + our_start_time=$(get_node_start_time $THIS_PCMK_NODE) + + if [ $our_start_time -eq 0 ]; then + new_score=$MIN_MASTER_SCORE + else + new_score=$BEST_MASTER_SCORE + for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE) do - node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'` - if [ -z "${node_start_time}" -o "${node_start_time}" = "(null)" ] ; then - node_uptime=0 - else - node_uptime=$(( $(now) - ${node_start_time} )) - fi - ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})" - if [ ${our_uptime} -lt ${node_uptime} ] - then - max=1 - break - else - # When uptime is equal, accept the existing master - if any - as the oldest node - is_master $node - status_master=$? - if [ $status_master -eq 0 ] ; then - max=1 - ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})" - break - else - max=0 - fi + node_start_time=$(get_node_start_time $node) + node_score=$(get_node_master_score $node) + + ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)" + if [ $node_start_time -ne 0 -a $node_score -ne 0 -a $node_start_time -lt $our_start_time ]; then + new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score )) fi done + fi - - if [ $max -eq 0 ] - then - ocf_log info "${LH} we are the oldest node" - master_score 1000 - fi + if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then + master_score $new_score fi + ocf_log info "${LH} our start time is $our_start_time and score is $new_score" # Skip all other checks if rabbit app is not running if [ $rabbit_running -ne $OCF_SUCCESS ]; then @@ -1929,28 +1926,6 @@ action_notify() { echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log fi - if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'pre' ] ; then - # PRE- anything notify section - case "$OCF_RESKEY_CRM_meta_notify_operation" in - promote) - ocf_log info "${LH} pre-promote begin." - my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname" - rc=$? - if [ $rc -eq $OCF_SUCCESS ] ; then - nodelist=$(get_all_pacemaker_nodes) - for i in $nodelist - do - ocf_log info "${LH} Deleting master attribute for node ${i}" - ocf_run crm_attribute -N $i -l reboot --name 'rabbit-master' --delete - done - ocf_log info "${LH} pre-promote end." - fi - ;; - *) - ;; - esac - fi - if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then # POST- anything notify section case "$OCF_RESKEY_CRM_meta_notify_operation" in @@ -2069,42 +2044,6 @@ action_notify() { # always returns OCF_SUCCESS ocf_log info "${LH} post-stop end." ;; - demote) - # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation) - ocf_log info "${LH} post-demote begin." - # Report not running, if the list of nodes being demoted reported empty - if [ -z "${OCF_RESKEY_CRM_meta_notify_demote_uname}" ] ; then - ocf_log warn "${LH} there are no nodes being demoted reported on post-demote. The resource will be restarted." - ocf_log info "${LH} post-demote end." - return $OCF_ERR_GENERIC - fi - my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}" - rc=$? - if [ $rc -ne $OCF_SUCCESS ] ; then - # On ohter nodes processing the post-demote, make sure the demoted node will be forgotten - unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_demote_uname}" - else - # Wait for synced state first - ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync" - wait_sync $((OCF_RESKEY_stop_time/2)) - # On the nodes being demoted, reset the master score - ocf_log info "${LH} resetting the master score." - master_score 0 - ocf_log info "${LH} Deleting start time attribute" - ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete - ocf_log info "${LH} Deleting master attribute" - ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete - ocf_log info "${LH} master was demoted. stopping RabbitMQ app." - stop_rmq_server_app - rc2=$? - if [ $rc2 -ne $OCF_SUCCESS ] ; then - ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed" - ocf_log info "${LH} post-demote end." - exit $OCF_FAILED_MASTER - fi - fi - ocf_log info "${LH} post-demote end." - ;; *) ;; esac fi @@ -2211,68 +2150,11 @@ action_promote() { action_demote() { - local rc=$OCF_ERR_GENERIC local LH="${LL} demote:" - - if [ "${OCF_RESKEY_debug}" = 'true' ] ; then - d=`date '+%Y%m%d %H:%M:%S'` - echo $d >> /tmp/rmq-demote.log - env >> /tmp/rmq-demote.log - echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log - - fi - ocf_log info "${LH} action begin." - - get_monitor - rc=$? - case "$rc" in - "$OCF_RUNNING_MASTER") - # Running as master. Normal, expected behavior. - ocf_log warn "${LH} Resource is currently running as Master" - ocf_log info "${LH} Deleting master attribute" - ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete - ocf_log info "${LH} Deleting start timestamp" - ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete - - # Wait for synced state first - ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync" - wait_sync $((OCF_RESKEY_stop_time/2)) - - stop_rmq_server_app - rc=$? - ;; - "$OCF_SUCCESS") - # Alread running as slave. Nothing to do. - ocf_log warn "${LH} Resource is currently running as Slave" - rc=$OCF_SUCCESS - ;; - "$OCF_FAILED_MASTER") - # Master failed and being demoted. - ocf_log err "${LH} Demoting of a failed Master." - ocf_log info "${LH} action end." - exit $OCF_FAILED_MASTER - ;; - "$OCF_NOT_RUNNING") - ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do." - rc=$OCF_SUCCESS - ;; - "$OCF_ERR_GENERIC") - ocf_log err "${LH} Error while demote. Stopping resource." - action_stop - rc=$? - ;; - *) - # Failed resource. Let the cluster manager recover. - ocf_log err "${LH} Unexpected error, cannot demote" - ocf_log info "${LH} action end." - exit $rc - ;; - esac - - # transform master RMQ-server to slave + ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete ocf_log info "${LH} action end." - return $rc + return $OCF_SUCCESS } ####################################################################### |
