Merge branch 'master' into rabbitmq-server-500-squashed

author: Michael Klishin <mklishin@pivotal.io> 2016-08-23 13:29:59 +0300
committer: Michael Klishin <mklishin@pivotal.io> 2016-08-23 13:29:59 +0300
commit: 922a43c2b56695478b9ef04699eceab5e170f649 (patch)
tree: 90f6d2dc14a5edddd4c78181d9849e048036d332
parent: 754feab6936ff1954964bf04423a2758899b91dc (diff)
parent: c72f7b4b91e05e4de7024e7c649feef8ee019338 (diff)
download: rabbitmq-server-git-922a43c2b56695478b9ef04699eceab5e170f649.tar.gz
6 files changed, 203 insertions, 242 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index 9e07dc8490..0dd27c72c4 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -319,6 +319,11 @@ $EXTENDED_OCF_PARAMS
 END
 }
 
+
+MIN_MASTER_SCORE=100
+BEST_MASTER_SCORE=1000
+
+
 #######################################################################
 # Functions invoked by resource manager actions
 
@@ -571,17 +576,21 @@ my_host() {
     return $rc
 }
 
-srv_uptime() {
-    local stime
-    stime=$( crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' )
-
-    if [ -z "${stime}" -o "${stime}" = "(null)" ] ; then
-        echo 0
-    else
-        echo $(( $(now) - ${stime} ))
+get_integer_node_attr() {
+    local value
+    value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }')
+    if [ $? -ne 0 -o -z "$value" ] ; then
+        value=0
     fi
+    echo $value
+}
 
-    return $OCF_SUCCESS
+get_node_start_time() {
+    get_integer_node_attr $1 'rabbit-start-time'
+}
+
+get_node_master_score() {
+    get_integer_node_attr $1 'master-p_rabbitmq-server'
 }
 
 # Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
@@ -873,12 +882,21 @@ action_validate() {
     return $OCF_SUCCESS
 }
 
+update_rabbit_start_time_if_rc() {
+    local nowtime
+    local rc=$1
+    if [ $rc -eq 0 ]; then
+        nowtime="$(now)"
+        ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with ${nowtime}"
+        ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+    fi
+}
+
 join_to_cluster() {
     local node="$1"
     local rmq_node
     local rc=$OCF_ERR_GENERIC
     local LH="${LL} join_to_cluster():"
-    local nowtime
 
     ocf_log info "${LH} start."
 
@@ -912,9 +930,7 @@ join_to_cluster() {
         action_stop
         return $OCF_ERR_GENERIC
     else
-        nowtime="$(now)"
-        ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with ${nowtime}"
-        ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+        update_rabbit_start_time_if_rc 0
         ocf_log info "${LH} Joined to cluster succesfully."
     fi
 
@@ -1245,7 +1261,7 @@ start_rmq_server_app() {
     rc=$?
     if [ $rc -eq $OCF_SUCCESS ] ; then
         # rabbitmq-server started successfuly as master of cluster
-        master_score 1  # minimal positive master-score for this node.
+        master_score $MIN_MASTER_SCORE
         stop_rmq_server_app
         rc=$?
         if [ $rc -ne 0 ] ; then
@@ -1269,7 +1285,7 @@ start_rmq_server_app() {
                 if [ $rc -eq $OCF_SUCCESS ]; then
                     ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
                     rc=$OCF_SUCCESS
-                    master_score 1
+                    master_score $MIN_MASTER_SCORE
                     break
                 else
                     ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed."
@@ -1362,25 +1378,18 @@ check_timeouts() {
     local op_name=$3
 
     if [ $op_rc -ne 124 -a $op_rc -ne 137 ]; then
-        ocf_run attrd_updater -p --name $timeouts_attr_name --update 0
+        ocf_update_private_attr $timeouts_attr_name 0
         return 0
     fi
 
     local count
-    count=`attrd_updater --name $timeouts_attr_name --query 2>/dev/null`
-    if [ $? -ne 0 ]; then
-        # the attrd_updater exited with error. In that case most probably it printed garbage
-        # instead of the number we need. So defensively assume that it is zero.
-
-        count=0
-    fi
-    count=`echo "${count}" | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
+    count=$(ocf_get_private_attr $timeouts_attr_name 0)
 
     count=$((count+1))
     # There is a slight chance that this piece of code will be executed twice simultaneously.
     # As a result, $timeouts_attr_name's value will be one less than it should be. But we don't need
     # precise calculation here.
-    ocf_run attrd_updater -p --name $timeouts_attr_name --update $count
+    ocf_update_private_attr $timeouts_attr_name $count
 
     if [ $count -lt $OCF_RESKEY_max_rabbitmqctl_timeouts ]; then
         ocf_log warn "${LH} 'rabbitmqctl $op_name' timed out $count of max. $OCF_RESKEY_max_rabbitmqctl_timeouts time(s) in a row. Doing nothing for now."
@@ -1407,11 +1416,8 @@ get_monitor() {
     local rabbit_running
     local name
     local node
-    local nodelist
-    local max
-    local our_uptime
-    local node_uptime
     local node_start_time
+    local nowtime
 
     ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}"
     get_status
@@ -1442,46 +1448,27 @@ get_monitor() {
     rabbit_running=$?
     ocf_log info "${LH} checking if rabbit app is running"
 
-    if [ $rabbit_running -eq $OCF_SUCCESS ]
-    then
-        ocf_log info "${LH} rabbit app is running. checking if we are the part of healthy cluster"
-
-        if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
-            # The master is always running inside of its cluster
+    if [ $rc -eq $OCF_RUNNING_MASTER ]; then
+        if [ $rabbit_running -eq $OCF_SUCCESS ]; then
             ocf_log info "${LH} rabbit app is running and is master of cluster"
-
         else
-            local master_name=$(get_master_name_but $THIS_PCMK_NODE)
-
-            if [ -z "$master_name" ]; then
-                ocf_log info "${LH} no master is elected currently. Skipping cluster health check."
-
-            elif is_clustered_with $master_name; then
-                ocf_log info "${LH} rabbit app is running and is member of healthy cluster"
-
-            else
-                # Rabbit is running but is not connected to master
-                # Failing to avoid split brain
-                ocf_log err "${LH} rabbit node is running out of the cluster"
-                stop_server_process
-                rc=$OCF_ERR_GENERIC
-            fi
+            ocf_log err "${LH} we are the master and rabbit app is not running. This is a failure"
+            exit $OCF_FAILED_MASTER
         fi
     else
-        if [ "$OCF_CHECK_LEVEL" -gt 20 ]; then
-            ocf_log info "${LH} rabbit app is not running. checking if there is a master"
-            # Do not refetch the master status as we know it already
-            if [ $rc -eq $OCF_RUNNING_MASTER ]; then
-              ocf_log err "${LH} we are the master and rabbit app is not running. this is a failure"
-              exit $OCF_FAILED_MASTER
-            fi
-
-            local master_name=$(get_master_name_but $THIS_PCMK_NODE)
-
-            if [ -n "$master_name" ]; then
-                ocf_log info "${LH} master exists and rabbit app is not running. Exiting to be restarted by pacemaker"
+        start_time=$((180 + $(ocf_get_private_attr 'rabbit-start-phase-1-time' 0)))
+        restart_order_time=$((60 + $(ocf_get_private_attr 'rabbit-ordered-to-restart' 0)))
+        nowtime=$(now)
+
+        # If we started more than 3 minutes ago, and
+        # we got order to restart less than 1 minute ago
+        if [ $nowtime -lt $restart_order_time ]; then
+            if [ $nowtime -gt $start_time ]; then
+                ocf_log err "${LH} failing because we have received an order to restart from the master"
                 stop_server_process
                 rc=$OCF_ERR_GENERIC
+            else
+                ocf_log warn "${LH} received an order to restart from the master, ignoring it because we have just started"
             fi
         fi
     fi
@@ -1491,45 +1478,37 @@ get_monitor() {
         ocf_log info "${LH} ensuring this slave does not get promoted."
         master_score 0
         return $OCF_ERR_GENERIC
-    elif [ $rc -ne $OCF_RUNNING_MASTER ] ; then
-        ocf_log info "${LH} preparing to update master score for node"
-        our_uptime=$(srv_uptime)
-        nodelist=$(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
-        max=1
-        for node in $nodelist
+    fi
+
+    # Recounting our master score
+    ocf_log info "${LH} preparing to update master score for node"
+    local our_start_time
+    local new_score
+    local node_start_time
+    local node_score
+
+    our_start_time=$(get_node_start_time $THIS_PCMK_NODE)
+
+    if [ $our_start_time -eq 0 ]; then
+        new_score=$MIN_MASTER_SCORE
+    else
+        new_score=$BEST_MASTER_SCORE
+        for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
         do
-            node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
-            if [ -z "${node_start_time}" -o "${node_start_time}" = "(null)" ] ; then
-                node_uptime=0
-            else
-                node_uptime=$(( $(now) - ${node_start_time}  ))
-            fi
-            ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})"
-            if [ ${our_uptime} -lt ${node_uptime} ]
-            then
-                max=1
-                break
-            else
-                # When uptime is equal, accept the existing master - if any - as the oldest node
-                is_master $node
-                status_master=$?
-                if [ $status_master -eq 0 ] ; then
-                    max=1
-                    ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})"
-                    break
-                else
-                    max=0
-                fi
+            node_start_time=$(get_node_start_time $node)
+            node_score=$(get_node_master_score $node)
+
+            ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)"
+            if [ $node_start_time -ne 0 -a $node_score -ne 0 -a $node_start_time -lt $our_start_time ]; then
+                new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
             fi
         done
+    fi
 
-
-        if [ $max -eq 0 ]
-        then
-            ocf_log info "${LH} we are the oldest node"
-            master_score 1000
-        fi
+    if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then
+        master_score $new_score
     fi
+    ocf_log info "${LH} our start time is $our_start_time and score is $new_score"
 
     # Skip all other checks if rabbit app is not running
     if [ $rabbit_running -ne $OCF_SUCCESS ]; then
@@ -1616,13 +1595,13 @@ get_monitor() {
 
         elif [ -n "${queues}" ]; then
             local q_c
-            q_c=`printf "%b\n" "${queues}" | wc -l`
+            q_c=`printf %b "${queues}\n" | wc -l`
             local mem
-            mem=`printf "%b\n" "${queues}" | awk -v sum=0 '{sum+=$1} END {print (sum/1048576)}'`
+            mem=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$1} END {print (sum/1048576)}'`
             local mes
-            mes=`printf "%b\n" "${queues}" | awk -v sum=0 '{sum+=$2} END {print sum}'`
+            mes=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$2} END {print sum}'`
             local c_u
-            c_u=`printf "%b\n" "${queues}" | awk -v sum=0 -v cnt=${q_c} '{sum+=$3} END {print (sum+1)/(cnt+1)}'`
+            c_u=`printf %b "${queues}\n" | awk -v sum=0 -v cnt=${q_c} '{sum+=$3} END {print (sum+1)/(cnt+1)}'`
             local status
             status=`echo $(su_rabbit_cmd "${OCF_RESKEY_ctl} -q status")`
             ocf_log info "${LH} RabbitMQ is running ${q_c} queues consuming ${mem}m of ${TOTALVMEM}m total, with ${mes} queued messages, average consumer utilization ${c_u}"
@@ -1630,14 +1609,41 @@ get_monitor() {
         fi
     fi
 
+    # If we are the master and healthy, check that we see other cluster members
+    # Order a member to restart if we don't see it
+    if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
+        for node in $(get_all_pacemaker_nodes); do
+            if ! is_clustered_with $node; then
+                nowtime=$(now)
+
+                ocf_log warn "${LH} node $node is not connected with us, ordering it to restart."
+                ocf_update_private_attr 'rabbit-ordered-to-restart' "$nowtime" "$node"
+            fi
+        done
+    fi
+
     ocf_log info "${LH} get_monitor function ready to return ${rc}"
     return $rc
 }
 
+ocf_get_private_attr() {
+    local attr_name="${1:?}"
+    local attr_default_value="${2:?}"
+    local nodename="${3:-$THIS_PCMK_NODE}"
+    local count
+    count=$(attrd_updater -p --name "$attr_name" --node "$nodename" --query)
+    if [ $? -ne 0 ]; then
+        echo $attr_default_value
+    else
+        echo "$count" | awk -vdef_val="$attr_default_value" '{ gsub(/"/, "", $3); split($3, vals, "="); if (vals[2] != "") print vals[2]; else print def_val }'
+    fi
+}
+
 ocf_update_private_attr() {
     local attr_name="${1:?}"
     local attr_value="${2:?}"
-    ocf_run attrd_updater -p --name "$attr_name" --update "$attr_value"
+    local nodename="${3:-$THIS_PCMK_NODE}"
+    ocf_run attrd_updater -p --name "$attr_name" --node "$nodename" --update "$attr_value"
 }
 
 rabbitmqctl_with_timeout_check() {
@@ -1687,6 +1693,7 @@ action_monitor() {
 action_start() {
     local rc=$OCF_ERR_GENERIC
     local LH="${LL} start:"
+    local nowtime
 
     if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
         d=`date '+%Y%m%d %H:%M:%S'`
@@ -1710,6 +1717,9 @@ action_start() {
         ocf_update_private_attr $attr_name_to_reset 0
     done
 
+    nowtime=$(now)
+    ocf_log info "${LH} Setting phase 1 one start time to $nowtime"
+    ocf_update_private_attr 'rabbit-start-phase-1-time' "$nowtime"
     ocf_log info "${LH} Deleting start time attribute"
     ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
     ocf_log info "${LH} Deleting master attribute"
@@ -1915,7 +1925,6 @@ action_notify() {
     local rc2=$OCF_ERR_GENERIC
     local LH="${LL} notify:"
     local nodelist
-    local nowtime
 
     if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
         d=`date '+%Y%m%d %H:%M:%S'`
@@ -1924,28 +1933,6 @@ action_notify() {
         echo "$d  [notify]  ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
     fi
 
-    if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'pre' ] ; then
-        # PRE- anything notify section
-        case "$OCF_RESKEY_CRM_meta_notify_operation" in
-            promote)
-                ocf_log info "${LH} pre-promote begin."
-                my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname"
-                rc=$?
-                if [ $rc -eq $OCF_SUCCESS ] ; then
-                    nodelist=$(get_all_pacemaker_nodes)
-                    for i in $nodelist
-                    do
-                       ocf_log info "${LH} Deleting master attribute for node ${i}"
-                       ocf_run crm_attribute -N $i -l reboot --name 'rabbit-master' --delete
-                    done
-                    ocf_log info "${LH} pre-promote end."
-                fi
-                ;;
-            *)
-                ;;
-        esac
-    fi
-
     if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
         # POST- anything notify section
         case "$OCF_RESKEY_CRM_meta_notify_operation" in
@@ -1963,7 +1950,15 @@ action_notify() {
                     ocf_log info "${LH} ignoring post-promote of self"
 
                 elif is_clustered_with "${OCF_RESKEY_CRM_meta_notify_promote_uname}"; then
-                    ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. Nothing to do."
+                    if get_status rabbit; then
+                        ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. Nothing to do."
+                    else
+                        ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. We only need to start the app."
+
+                        try_to_start_rmq_app
+                        rc2=$?
+                        update_rabbit_start_time_if_rc $rc2
+                    fi
 
                 else
                     # Note, this should fail when the mnesia is inconsistent.
@@ -2012,14 +2007,10 @@ action_notify() {
                       rc2=$?
                     else
                       ocf_log warn "${LH} We are already clustered with node ${OCF_RESKEY_CRM_meta_notify_master_uname}"
-                      if try_to_start_rmq_app; then
-                          rc2=$OCF_SUCCESS
-                          nowtime="$(now)"
-                          ocf_log info "${LH} Updating start time attribute with ${nowtime}"
-                          ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
-                      else
-                          rc2=$OCF_ERR_GENERIC
-                      fi
+
+                      try_to_start_rmq_app
+                      rc2=$?
+                      update_rabbit_start_time_if_rc $rc2
                     fi
                     ocf_log info "${LH} post-start end."
                     if [ -s "${OCF_RESKEY_definitions_dump_file}" ] ; then
@@ -2064,42 +2055,6 @@ action_notify() {
                 # always returns OCF_SUCCESS
                 ocf_log info "${LH} post-stop end."
                 ;;
-            demote)
-                # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
-                ocf_log info "${LH} post-demote begin."
-                # Report not running, if the list of nodes being demoted reported empty
-                if [ -z "${OCF_RESKEY_CRM_meta_notify_demote_uname}" ] ; then
-                  ocf_log warn "${LH} there are no nodes being demoted reported on post-demote. The resource will be restarted."
-                  ocf_log info "${LH} post-demote end."
-                  return $OCF_ERR_GENERIC
-                fi
-                my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
-                rc=$?
-                if [ $rc -ne $OCF_SUCCESS ] ; then
-                    # On ohter nodes processing the post-demote, make sure the demoted node will be forgotten
-                    unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
-                else
-                    # Wait for synced state first
-                    ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
-                    wait_sync $((OCF_RESKEY_stop_time/2))
-                    # On the nodes being demoted, reset the master score
-                    ocf_log info "${LH} resetting the master score."
-                    master_score 0
-                    ocf_log info "${LH} Deleting start time attribute"
-                    ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
-                    ocf_log info "${LH} Deleting master attribute"
-                    ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
-                    ocf_log info "${LH} master was demoted. stopping RabbitMQ app."
-                    stop_rmq_server_app
-                    rc2=$?
-                    if [ $rc2 -ne $OCF_SUCCESS ] ; then
-                        ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed"
-                        ocf_log info "${LH} post-demote end."
-                        exit $OCF_FAILED_MASTER
-                    fi
-                fi
-                ocf_log info "${LH} post-demote end."
-                ;;
             *)  ;;
         esac
     fi
@@ -2111,7 +2066,6 @@ action_notify() {
 action_promote() {
     local rc=$OCF_ERR_GENERIC
     local LH="${LL} promote:"
-    local nowtime
 
     if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
         d=$(date '+%Y%m%d %H:%M:%S')
@@ -2149,10 +2103,8 @@ action_promote() {
 
                     [ -f "${OCF_RESKEY_policy_file}" ] && . "${OCF_RESKEY_policy_file}"
 
-                    # create timestamp file
-                    nowtime="$(now)"
-                    ocf_log info "${LH} Updating start timestamp with ${nowtime}"
-                    ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+                    update_rabbit_start_time_if_rc $rc
+
                     ocf_log info "${LH} Checking master status"
                     get_monitor
                     rc=$?
@@ -2206,68 +2158,11 @@ action_promote() {
 
 
 action_demote() {
-    local rc=$OCF_ERR_GENERIC
     local LH="${LL} demote:"
-
-    if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
-        d=`date '+%Y%m%d %H:%M:%S'`
-        echo $d >> /tmp/rmq-demote.log
-        env >> /tmp/rmq-demote.log
-        echo "$d  [demote]  start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
-
-    fi
-
     ocf_log info "${LH} action begin."
-
-    get_monitor
-    rc=$?
-    case "$rc" in
-        "$OCF_RUNNING_MASTER")
-            # Running as master. Normal, expected behavior.
-            ocf_log warn "${LH} Resource is currently running as Master"
-            ocf_log info "${LH} Deleting master attribute"
-            ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
-            ocf_log info "${LH} Deleting start timestamp"
-            ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
-
-            # Wait for synced state first
-            ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
-            wait_sync $((OCF_RESKEY_stop_time/2))
-
-            stop_rmq_server_app
-            rc=$?
-            ;;
-        "$OCF_SUCCESS")
-            # Alread running as slave. Nothing to do.
-            ocf_log warn "${LH} Resource is currently running as Slave"
-            rc=$OCF_SUCCESS
-            ;;
-        "$OCF_FAILED_MASTER")
-            # Master failed and being demoted.
-            ocf_log err "${LH} Demoting of a failed Master."
-            ocf_log info "${LH} action end."
-            exit $OCF_FAILED_MASTER
-            ;;
-        "$OCF_NOT_RUNNING")
-            ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do."
-            rc=$OCF_SUCCESS
-            ;;
-        "$OCF_ERR_GENERIC")
-            ocf_log err "${LH} Error while demote. Stopping resource."
-            action_stop
-            rc=$?
-            ;;
-        *)
-            # Failed resource. Let the cluster manager recover.
-            ocf_log err "${LH} Unexpected error, cannot demote"
-            ocf_log info "${LH} action end."
-            exit $rc
-            ;;
-    esac
-
-    # transform master RMQ-server to slave
+    ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
     ocf_log info "${LH} action end."
-    return $rc
+    return $OCF_SUCCESS
 }
 #######################################################################
 
diff --git a/src/gm.erl b/src/gm.erl
index 985b5bfa6d..0b5c1c44c4 100644
--- a/src/gm.erl
+++ b/src/gm.erl
@@ -760,6 +760,9 @@ handle_info({'DOWN', MRef, process, _Pid, Reason},
     end;
 handle_info(_, State) ->
     %% Discard any unexpected messages, such as late replies from neighbour_call/2
+    %% TODO: For #gm_group{} related info messages, it could be worthwhile to
+    %% change_view/2, as this might reflect an alteration in the gm group, meaning
+    %% we now need to update our state. see rabbitmq-server#914.
     noreply(State).
 
 terminate(Reason, #state { module = Module, callback_args = Args }) ->
@@ -1596,7 +1599,9 @@ check_membership(Self, #gm_group{members = M} = Group) ->
             Group;
         false ->
             throw(lost_membership)
-    end.
+    end;
+check_membership(_Self, {error, not_found}) ->
+    throw(lost_membership).
 
 check_membership(GroupName) ->
     case dirty_read_group(GroupName) of
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl
index 221f11f18a..017d1d1fa2 100644
--- a/src/rabbit_mirror_queue_coordinator.erl
+++ b/src/rabbit_mirror_queue_coordinator.erl
@@ -355,6 +355,15 @@ handle_cast({gm_deaths, DeadGMPids},
                                                    DeadPids),
             rabbit_mirror_queue_misc:add_mirrors(QueueName, ExtraNodes, async),
             noreply(State);
+        {ok, _MPid0, DeadPids, _ExtraNodes} ->
+            %% see rabbitmq-server#914;
+            %% Different slave is now master, stop current coordinator normally.
+            %% Initiating queue is now slave and the least we could do is report
+            %% deaths which we 'think' we saw.
+            %% NOTE: Reported deaths here, could be inconsistant.
+            rabbit_mirror_queue_misc:report_deaths(MPid, false, QueueName,
+                                                   DeadPids),
+            {stop, normal, State};
         {error, not_found} ->
             {stop, normal, State}
     end;
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
index 35ee87937e..59522da4a9 100644
--- a/src/rabbit_mirror_queue_misc.erl
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -78,7 +78,7 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
     rabbit_misc:execute_mnesia_transaction(
       fun () ->
               %% Someone else could have deleted the queue before we
-              %% get here.
+              %% get here. Or, gm group could've altered. see rabbitmq-server#914
               case mnesia:read({rabbit_queue, QueueName}) of
                   [] -> {error, not_found};
                   [Q = #amqqueue { pid        = QPid,
@@ -92,7 +92,16 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
                       AlivePids = [Pid || {_GM, Pid} <- AliveGM],
                       Alive     = [Pid || Pid <- [QPid | SPids],
                                           lists:member(Pid, AlivePids)],
-                      {QPid1, SPids1} = promote_slave(Alive),
+                      {QPid1, SPids1} = case Alive of
+                                            [] ->
+                                                %% GM altered, & if all pids are
+                                                %% perceived as dead, rather do
+                                                %% do nothing here, & trust the
+                                                %% promoted slave to have updated
+                                                %% mnesia during the alteration.
+                                                {QPid, SPids};
+                                            _  -> promote_slave(Alive)
+                                        end,
                       Extra =
                           case {{QPid, SPids}, {QPid1, SPids1}} of
                               {Same, Same} ->
@@ -100,7 +109,8 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
                               _ when QPid =:= QPid1 orelse QPid1 =:= Self ->
                                   %% Either master hasn't changed, so
                                   %% we're ok to update mnesia; or we have
-                                  %% become the master.
+                                  %% become the master. If gm altered,
+                                  %% we have no choice but to proceed.
                                   Q1 = Q#amqqueue{pid        = QPid1,
                                                   slave_pids = SPids1,
                                                   gm_pids    = AliveGM},
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
index ec27eaf68a..12b13c36fd 100644
--- a/src/rabbit_mirror_queue_slave.erl
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -247,9 +247,15 @@ handle_call({gm_deaths, DeadGMPids}, From,
                 _ ->
                     %% master has changed to not us
                     gen_server2:reply(From, ok),
-                    %% assertion, we don't need to add_mirrors/2 in this
-                    %% branch, see last clause in remove_from_queue/2
-                    [] = ExtraNodes,
+                    %% see rabbitmq-server#914;
+                    %% It's not always guaranteed that we won't have ExtraNodes.
+                    %% If gm alters, master can change to not us with extra nodes,
+                    %% in which case we attempt to add mirrors on those nodes.
+                    case ExtraNodes of
+                        [] -> void;
+                        _  -> rabbit_mirror_queue_misc:add_mirrors(
+                                QName, ExtraNodes, async)
+                    end,
                     %% Since GM is by nature lazy we need to make sure
                     %% there is some traffic when a master dies, to
                     %% make sure all slaves get informed of the
diff --git a/test/gm_SUITE.erl b/test/gm_SUITE.erl
index e4c68a257a..8b07c9efad 100644
--- a/test/gm_SUITE.erl
+++ b/test/gm_SUITE.erl
@@ -39,7 +39,8 @@ all() ->
       confirmed_broadcast,
       member_death,
       receive_in_order,
-      unexpected_msg
+      unexpected_msg,
+      down_in_members_change
     ].
 
 init_per_suite(Config) ->
@@ -123,6 +124,41 @@ unexpected_msg(_Config) ->
 		       passed
 	       end).
 
+down_in_members_change(_Config) ->
+    %% Setup
+    ok = gm:create_tables(),
+    {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(),
+                              fun rabbit_misc:execute_mnesia_transaction/1),
+    passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1),
+    {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(),
+                               fun rabbit_misc:execute_mnesia_transaction/1),
+    passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2),
+    passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2),
+
+    %% Test. Simulate that the gm group is deleted (forget_group) while
+    %% processing the 'DOWN' message from the neighbour
+    process_flag(trap_exit, true),
+    ok = meck:new(mnesia, [passthrough]),
+    ok = meck:expect(mnesia, read, fun({gm_group, ?MODULE}) ->
+					   [];
+				      (Key) ->
+					   meck:passthrough([Key])
+				   end),
+    gm:leave(Pid2),
+    Passed = receive
+		 {'EXIT', Pid, normal} ->
+		     passed;
+		 {'EXIT', Pid, _} ->
+		     crashed
+	     after 15000 ->
+		     timeout
+	     end,
+    %% Cleanup
+    meck:unload(mnesia),
+    process_flag(trap_exit, false),
+    passed = Passed.
+
+
 do_broadcast(Fun) ->
     with_two_members(broadcast_fun(Fun)).
author	Michael Klishin <mklishin@pivotal.io>	2016-08-23 13:29:59 +0300
committer	Michael Klishin <mklishin@pivotal.io>	2016-08-23 13:29:59 +0300
commit	922a43c2b56695478b9ef04699eceab5e170f649 (patch)
tree	90f6d2dc14a5edddd4c78181d9849e048036d332
parent	754feab6936ff1954964bf04423a2758899b91dc (diff)
parent	c72f7b4b91e05e4de7024e7c649feef8ee019338 (diff)
download	rabbitmq-server-git-922a43c2b56695478b9ef04699eceab5e170f649.tar.gz