summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Klishin <mklishin@pivotal.io>2016-08-23 13:29:59 +0300
committerMichael Klishin <mklishin@pivotal.io>2016-08-23 13:29:59 +0300
commit922a43c2b56695478b9ef04699eceab5e170f649 (patch)
tree90f6d2dc14a5edddd4c78181d9849e048036d332
parent754feab6936ff1954964bf04423a2758899b91dc (diff)
parentc72f7b4b91e05e4de7024e7c649feef8ee019338 (diff)
downloadrabbitmq-server-git-922a43c2b56695478b9ef04699eceab5e170f649.tar.gz
Merge branch 'master' into rabbitmq-server-500-squashed
-rwxr-xr-xscripts/rabbitmq-server-ha.ocf363
-rw-r--r--src/gm.erl7
-rw-r--r--src/rabbit_mirror_queue_coordinator.erl9
-rw-r--r--src/rabbit_mirror_queue_misc.erl16
-rw-r--r--src/rabbit_mirror_queue_slave.erl12
-rw-r--r--test/gm_SUITE.erl38
6 files changed, 203 insertions, 242 deletions
diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf
index 9e07dc8490..0dd27c72c4 100755
--- a/scripts/rabbitmq-server-ha.ocf
+++ b/scripts/rabbitmq-server-ha.ocf
@@ -319,6 +319,11 @@ $EXTENDED_OCF_PARAMS
END
}
+
+MIN_MASTER_SCORE=100
+BEST_MASTER_SCORE=1000
+
+
#######################################################################
# Functions invoked by resource manager actions
@@ -571,17 +576,21 @@ my_host() {
return $rc
}
-srv_uptime() {
- local stime
- stime=$( crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' )
-
- if [ -z "${stime}" -o "${stime}" = "(null)" ] ; then
- echo 0
- else
- echo $(( $(now) - ${stime} ))
+get_integer_node_attr() {
+ local value
+ value=$(crm_attribute -N $1 -l reboot --name "$2" --query 2>/dev/null | awk '{ split($3, vals, "="); if (vals[2] != "(null)") print vals[2] }')
+ if [ $? -ne 0 -o -z "$value" ] ; then
+ value=0
fi
+ echo $value
+}
- return $OCF_SUCCESS
+get_node_start_time() {
+ get_integer_node_attr $1 'rabbit-start-time'
+}
+
+get_node_master_score() {
+ get_integer_node_attr $1 'master-p_rabbitmq-server'
}
# Return either rabbit node name as FQDN or shortname, depends on the OCF_RESKEY_use_fqdn.
@@ -873,12 +882,21 @@ action_validate() {
return $OCF_SUCCESS
}
+update_rabbit_start_time_if_rc() {
+ local nowtime
+ local rc=$1
+ if [ $rc -eq 0 ]; then
+ nowtime="$(now)"
+ ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with ${nowtime}"
+ ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+ fi
+}
+
join_to_cluster() {
local node="$1"
local rmq_node
local rc=$OCF_ERR_GENERIC
local LH="${LL} join_to_cluster():"
- local nowtime
ocf_log info "${LH} start."
@@ -912,9 +930,7 @@ join_to_cluster() {
action_stop
return $OCF_ERR_GENERIC
else
- nowtime="$(now)"
- ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with ${nowtime}"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+ update_rabbit_start_time_if_rc 0
ocf_log info "${LH} Joined to cluster succesfully."
fi
@@ -1245,7 +1261,7 @@ start_rmq_server_app() {
rc=$?
if [ $rc -eq $OCF_SUCCESS ] ; then
# rabbitmq-server started successfuly as master of cluster
- master_score 1 # minimal positive master-score for this node.
+ master_score $MIN_MASTER_SCORE
stop_rmq_server_app
rc=$?
if [ $rc -ne 0 ] ; then
@@ -1269,7 +1285,7 @@ start_rmq_server_app() {
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully."
rc=$OCF_SUCCESS
- master_score 1
+ master_score $MIN_MASTER_SCORE
break
else
ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed."
@@ -1362,25 +1378,18 @@ check_timeouts() {
local op_name=$3
if [ $op_rc -ne 124 -a $op_rc -ne 137 ]; then
- ocf_run attrd_updater -p --name $timeouts_attr_name --update 0
+ ocf_update_private_attr $timeouts_attr_name 0
return 0
fi
local count
- count=`attrd_updater --name $timeouts_attr_name --query 2>/dev/null`
- if [ $? -ne 0 ]; then
- # the attrd_updater exited with error. In that case most probably it printed garbage
- # instead of the number we need. So defensively assume that it is zero.
-
- count=0
- fi
- count=`echo "${count}" | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
+ count=$(ocf_get_private_attr $timeouts_attr_name 0)
count=$((count+1))
# There is a slight chance that this piece of code will be executed twice simultaneously.
# As a result, $timeouts_attr_name's value will be one less than it should be. But we don't need
# precise calculation here.
- ocf_run attrd_updater -p --name $timeouts_attr_name --update $count
+ ocf_update_private_attr $timeouts_attr_name $count
if [ $count -lt $OCF_RESKEY_max_rabbitmqctl_timeouts ]; then
ocf_log warn "${LH} 'rabbitmqctl $op_name' timed out $count of max. $OCF_RESKEY_max_rabbitmqctl_timeouts time(s) in a row. Doing nothing for now."
@@ -1407,11 +1416,8 @@ get_monitor() {
local rabbit_running
local name
local node
- local nodelist
- local max
- local our_uptime
- local node_uptime
local node_start_time
+ local nowtime
ocf_log info "${LH} CHECK LEVEL IS: ${OCF_CHECK_LEVEL}"
get_status
@@ -1442,46 +1448,27 @@ get_monitor() {
rabbit_running=$?
ocf_log info "${LH} checking if rabbit app is running"
- if [ $rabbit_running -eq $OCF_SUCCESS ]
- then
- ocf_log info "${LH} rabbit app is running. checking if we are the part of healthy cluster"
-
- if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
- # The master is always running inside of its cluster
+ if [ $rc -eq $OCF_RUNNING_MASTER ]; then
+ if [ $rabbit_running -eq $OCF_SUCCESS ]; then
ocf_log info "${LH} rabbit app is running and is master of cluster"
-
else
- local master_name=$(get_master_name_but $THIS_PCMK_NODE)
-
- if [ -z "$master_name" ]; then
- ocf_log info "${LH} no master is elected currently. Skipping cluster health check."
-
- elif is_clustered_with $master_name; then
- ocf_log info "${LH} rabbit app is running and is member of healthy cluster"
-
- else
- # Rabbit is running but is not connected to master
- # Failing to avoid split brain
- ocf_log err "${LH} rabbit node is running out of the cluster"
- stop_server_process
- rc=$OCF_ERR_GENERIC
- fi
+ ocf_log err "${LH} we are the master and rabbit app is not running. This is a failure"
+ exit $OCF_FAILED_MASTER
fi
else
- if [ "$OCF_CHECK_LEVEL" -gt 20 ]; then
- ocf_log info "${LH} rabbit app is not running. checking if there is a master"
- # Do not refetch the master status as we know it already
- if [ $rc -eq $OCF_RUNNING_MASTER ]; then
- ocf_log err "${LH} we are the master and rabbit app is not running. this is a failure"
- exit $OCF_FAILED_MASTER
- fi
-
- local master_name=$(get_master_name_but $THIS_PCMK_NODE)
-
- if [ -n "$master_name" ]; then
- ocf_log info "${LH} master exists and rabbit app is not running. Exiting to be restarted by pacemaker"
+ start_time=$((180 + $(ocf_get_private_attr 'rabbit-start-phase-1-time' 0)))
+ restart_order_time=$((60 + $(ocf_get_private_attr 'rabbit-ordered-to-restart' 0)))
+ nowtime=$(now)
+
+ # If we started more than 3 minutes ago, and
+ # we got order to restart less than 1 minute ago
+ if [ $nowtime -lt $restart_order_time ]; then
+ if [ $nowtime -gt $start_time ]; then
+ ocf_log err "${LH} failing because we have received an order to restart from the master"
stop_server_process
rc=$OCF_ERR_GENERIC
+ else
+ ocf_log warn "${LH} received an order to restart from the master, ignoring it because we have just started"
fi
fi
fi
@@ -1491,45 +1478,37 @@ get_monitor() {
ocf_log info "${LH} ensuring this slave does not get promoted."
master_score 0
return $OCF_ERR_GENERIC
- elif [ $rc -ne $OCF_RUNNING_MASTER ] ; then
- ocf_log info "${LH} preparing to update master score for node"
- our_uptime=$(srv_uptime)
- nodelist=$(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
- max=1
- for node in $nodelist
+ fi
+
+ # Recounting our master score
+ ocf_log info "${LH} preparing to update master score for node"
+ local our_start_time
+ local new_score
+ local node_start_time
+ local node_score
+
+ our_start_time=$(get_node_start_time $THIS_PCMK_NODE)
+
+ if [ $our_start_time -eq 0 ]; then
+ new_score=$MIN_MASTER_SCORE
+ else
+ new_score=$BEST_MASTER_SCORE
+ for node in $(get_alive_pacemaker_nodes_but $THIS_PCMK_NODE)
do
- node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'`
- if [ -z "${node_start_time}" -o "${node_start_time}" = "(null)" ] ; then
- node_uptime=0
- else
- node_uptime=$(( $(now) - ${node_start_time} ))
- fi
- ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})"
- if [ ${our_uptime} -lt ${node_uptime} ]
- then
- max=1
- break
- else
- # When uptime is equal, accept the existing master - if any - as the oldest node
- is_master $node
- status_master=$?
- if [ $status_master -eq 0 ] ; then
- max=1
- ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})"
- break
- else
- max=0
- fi
+ node_start_time=$(get_node_start_time $node)
+ node_score=$(get_node_master_score $node)
+
+ ocf_log info "${LH} comparing us (start time: $our_start_time, score: $new_score) with $node (start time: $node_start_time, score: $node_score)"
+ if [ $node_start_time -ne 0 -a $node_score -ne 0 -a $node_start_time -lt $our_start_time ]; then
+ new_score=$((node_score - 10 < new_score ? node_score - 10 : new_score ))
fi
done
+ fi
-
- if [ $max -eq 0 ]
- then
- ocf_log info "${LH} we are the oldest node"
- master_score 1000
- fi
+ if [ "$new_score" -ne "$(get_node_master_score $THIS_PCMK_NODE)" ]; then
+ master_score $new_score
fi
+ ocf_log info "${LH} our start time is $our_start_time and score is $new_score"
# Skip all other checks if rabbit app is not running
if [ $rabbit_running -ne $OCF_SUCCESS ]; then
@@ -1616,13 +1595,13 @@ get_monitor() {
elif [ -n "${queues}" ]; then
local q_c
- q_c=`printf "%b\n" "${queues}" | wc -l`
+ q_c=`printf %b "${queues}\n" | wc -l`
local mem
- mem=`printf "%b\n" "${queues}" | awk -v sum=0 '{sum+=$1} END {print (sum/1048576)}'`
+ mem=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$1} END {print (sum/1048576)}'`
local mes
- mes=`printf "%b\n" "${queues}" | awk -v sum=0 '{sum+=$2} END {print sum}'`
+ mes=`printf %b "${queues}\n" | awk -v sum=0 '{sum+=$2} END {print sum}'`
local c_u
- c_u=`printf "%b\n" "${queues}" | awk -v sum=0 -v cnt=${q_c} '{sum+=$3} END {print (sum+1)/(cnt+1)}'`
+ c_u=`printf %b "${queues}\n" | awk -v sum=0 -v cnt=${q_c} '{sum+=$3} END {print (sum+1)/(cnt+1)}'`
local status
status=`echo $(su_rabbit_cmd "${OCF_RESKEY_ctl} -q status")`
ocf_log info "${LH} RabbitMQ is running ${q_c} queues consuming ${mem}m of ${TOTALVMEM}m total, with ${mes} queued messages, average consumer utilization ${c_u}"
@@ -1630,14 +1609,41 @@ get_monitor() {
fi
fi
+ # If we are the master and healthy, check that we see other cluster members
+ # Order a member to restart if we don't see it
+ if [ $rc -eq $OCF_RUNNING_MASTER ] ; then
+ for node in $(get_all_pacemaker_nodes); do
+ if ! is_clustered_with $node; then
+ nowtime=$(now)
+
+ ocf_log warn "${LH} node $node is not connected with us, ordering it to restart."
+ ocf_update_private_attr 'rabbit-ordered-to-restart' "$nowtime" "$node"
+ fi
+ done
+ fi
+
ocf_log info "${LH} get_monitor function ready to return ${rc}"
return $rc
}
+ocf_get_private_attr() {
+ local attr_name="${1:?}"
+ local attr_default_value="${2:?}"
+ local nodename="${3:-$THIS_PCMK_NODE}"
+ local count
+ count=$(attrd_updater -p --name "$attr_name" --node "$nodename" --query)
+ if [ $? -ne 0 ]; then
+ echo $attr_default_value
+ else
+ echo "$count" | awk -vdef_val="$attr_default_value" '{ gsub(/"/, "", $3); split($3, vals, "="); if (vals[2] != "") print vals[2]; else print def_val }'
+ fi
+}
+
ocf_update_private_attr() {
local attr_name="${1:?}"
local attr_value="${2:?}"
- ocf_run attrd_updater -p --name "$attr_name" --update "$attr_value"
+ local nodename="${3:-$THIS_PCMK_NODE}"
+ ocf_run attrd_updater -p --name "$attr_name" --node "$nodename" --update "$attr_value"
}
rabbitmqctl_with_timeout_check() {
@@ -1687,6 +1693,7 @@ action_monitor() {
action_start() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} start:"
+ local nowtime
if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
d=`date '+%Y%m%d %H:%M:%S'`
@@ -1710,6 +1717,9 @@ action_start() {
ocf_update_private_attr $attr_name_to_reset 0
done
+ nowtime=$(now)
+ ocf_log info "${LH} Setting phase 1 one start time to $nowtime"
+ ocf_update_private_attr 'rabbit-start-phase-1-time' "$nowtime"
ocf_log info "${LH} Deleting start time attribute"
ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
ocf_log info "${LH} Deleting master attribute"
@@ -1915,7 +1925,6 @@ action_notify() {
local rc2=$OCF_ERR_GENERIC
local LH="${LL} notify:"
local nodelist
- local nowtime
if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
d=`date '+%Y%m%d %H:%M:%S'`
@@ -1924,28 +1933,6 @@ action_notify() {
echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
fi
- if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'pre' ] ; then
- # PRE- anything notify section
- case "$OCF_RESKEY_CRM_meta_notify_operation" in
- promote)
- ocf_log info "${LH} pre-promote begin."
- my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname"
- rc=$?
- if [ $rc -eq $OCF_SUCCESS ] ; then
- nodelist=$(get_all_pacemaker_nodes)
- for i in $nodelist
- do
- ocf_log info "${LH} Deleting master attribute for node ${i}"
- ocf_run crm_attribute -N $i -l reboot --name 'rabbit-master' --delete
- done
- ocf_log info "${LH} pre-promote end."
- fi
- ;;
- *)
- ;;
- esac
- fi
-
if [ "${OCF_RESKEY_CRM_meta_notify_type}" = 'post' ] ; then
# POST- anything notify section
case "$OCF_RESKEY_CRM_meta_notify_operation" in
@@ -1963,7 +1950,15 @@ action_notify() {
ocf_log info "${LH} ignoring post-promote of self"
elif is_clustered_with "${OCF_RESKEY_CRM_meta_notify_promote_uname}"; then
- ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. Nothing to do."
+ if get_status rabbit; then
+ ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. Nothing to do."
+ else
+ ocf_log info "${LH} we are already clustered with master - ${OCF_RESKEY_CRM_meta_notify_promote_uname}. We only need to start the app."
+
+ try_to_start_rmq_app
+ rc2=$?
+ update_rabbit_start_time_if_rc $rc2
+ fi
else
# Note, this should fail when the mnesia is inconsistent.
@@ -2012,14 +2007,10 @@ action_notify() {
rc2=$?
else
ocf_log warn "${LH} We are already clustered with node ${OCF_RESKEY_CRM_meta_notify_master_uname}"
- if try_to_start_rmq_app; then
- rc2=$OCF_SUCCESS
- nowtime="$(now)"
- ocf_log info "${LH} Updating start time attribute with ${nowtime}"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
- else
- rc2=$OCF_ERR_GENERIC
- fi
+
+ try_to_start_rmq_app
+ rc2=$?
+ update_rabbit_start_time_if_rc $rc2
fi
ocf_log info "${LH} post-start end."
if [ -s "${OCF_RESKEY_definitions_dump_file}" ] ; then
@@ -2064,42 +2055,6 @@ action_notify() {
# always returns OCF_SUCCESS
ocf_log info "${LH} post-stop end."
;;
- demote)
- # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation)
- ocf_log info "${LH} post-demote begin."
- # Report not running, if the list of nodes being demoted reported empty
- if [ -z "${OCF_RESKEY_CRM_meta_notify_demote_uname}" ] ; then
- ocf_log warn "${LH} there are no nodes being demoted reported on post-demote. The resource will be restarted."
- ocf_log info "${LH} post-demote end."
- return $OCF_ERR_GENERIC
- fi
- my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- rc=$?
- if [ $rc -ne $OCF_SUCCESS ] ; then
- # On ohter nodes processing the post-demote, make sure the demoted node will be forgotten
- unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_demote_uname}"
- else
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
- # On the nodes being demoted, reset the master score
- ocf_log info "${LH} resetting the master score."
- master_score 0
- ocf_log info "${LH} Deleting start time attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} master was demoted. stopping RabbitMQ app."
- stop_rmq_server_app
- rc2=$?
- if [ $rc2 -ne $OCF_SUCCESS ] ; then
- ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed"
- ocf_log info "${LH} post-demote end."
- exit $OCF_FAILED_MASTER
- fi
- fi
- ocf_log info "${LH} post-demote end."
- ;;
*) ;;
esac
fi
@@ -2111,7 +2066,6 @@ action_notify() {
action_promote() {
local rc=$OCF_ERR_GENERIC
local LH="${LL} promote:"
- local nowtime
if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
d=$(date '+%Y%m%d %H:%M:%S')
@@ -2149,10 +2103,8 @@ action_promote() {
[ -f "${OCF_RESKEY_policy_file}" ] && . "${OCF_RESKEY_policy_file}"
- # create timestamp file
- nowtime="$(now)"
- ocf_log info "${LH} Updating start timestamp with ${nowtime}"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --update "${nowtime}"
+ update_rabbit_start_time_if_rc $rc
+
ocf_log info "${LH} Checking master status"
get_monitor
rc=$?
@@ -2206,68 +2158,11 @@ action_promote() {
action_demote() {
- local rc=$OCF_ERR_GENERIC
local LH="${LL} demote:"
-
- if [ "${OCF_RESKEY_debug}" = 'true' ] ; then
- d=`date '+%Y%m%d %H:%M:%S'`
- echo $d >> /tmp/rmq-demote.log
- env >> /tmp/rmq-demote.log
- echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log
-
- fi
-
ocf_log info "${LH} action begin."
-
- get_monitor
- rc=$?
- case "$rc" in
- "$OCF_RUNNING_MASTER")
- # Running as master. Normal, expected behavior.
- ocf_log warn "${LH} Resource is currently running as Master"
- ocf_log info "${LH} Deleting master attribute"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
- ocf_log info "${LH} Deleting start timestamp"
- ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-start-time' --delete
-
- # Wait for synced state first
- ocf_log info "${LH} waiting $((OCF_RESKEY_stop_time/2)) to sync"
- wait_sync $((OCF_RESKEY_stop_time/2))
-
- stop_rmq_server_app
- rc=$?
- ;;
- "$OCF_SUCCESS")
- # Alread running as slave. Nothing to do.
- ocf_log warn "${LH} Resource is currently running as Slave"
- rc=$OCF_SUCCESS
- ;;
- "$OCF_FAILED_MASTER")
- # Master failed and being demoted.
- ocf_log err "${LH} Demoting of a failed Master."
- ocf_log info "${LH} action end."
- exit $OCF_FAILED_MASTER
- ;;
- "$OCF_NOT_RUNNING")
- ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do."
- rc=$OCF_SUCCESS
- ;;
- "$OCF_ERR_GENERIC")
- ocf_log err "${LH} Error while demote. Stopping resource."
- action_stop
- rc=$?
- ;;
- *)
- # Failed resource. Let the cluster manager recover.
- ocf_log err "${LH} Unexpected error, cannot demote"
- ocf_log info "${LH} action end."
- exit $rc
- ;;
- esac
-
- # transform master RMQ-server to slave
+ ocf_run crm_attribute -N $THIS_PCMK_NODE -l reboot --name 'rabbit-master' --delete
ocf_log info "${LH} action end."
- return $rc
+ return $OCF_SUCCESS
}
#######################################################################
diff --git a/src/gm.erl b/src/gm.erl
index 985b5bfa6d..0b5c1c44c4 100644
--- a/src/gm.erl
+++ b/src/gm.erl
@@ -760,6 +760,9 @@ handle_info({'DOWN', MRef, process, _Pid, Reason},
end;
handle_info(_, State) ->
%% Discard any unexpected messages, such as late replies from neighbour_call/2
+ %% TODO: For #gm_group{} related info messages, it could be worthwhile to
+ %% change_view/2, as this might reflect an alteration in the gm group, meaning
+ %% we now need to update our state. see rabbitmq-server#914.
noreply(State).
terminate(Reason, #state { module = Module, callback_args = Args }) ->
@@ -1596,7 +1599,9 @@ check_membership(Self, #gm_group{members = M} = Group) ->
Group;
false ->
throw(lost_membership)
- end.
+ end;
+check_membership(_Self, {error, not_found}) ->
+ throw(lost_membership).
check_membership(GroupName) ->
case dirty_read_group(GroupName) of
diff --git a/src/rabbit_mirror_queue_coordinator.erl b/src/rabbit_mirror_queue_coordinator.erl
index 221f11f18a..017d1d1fa2 100644
--- a/src/rabbit_mirror_queue_coordinator.erl
+++ b/src/rabbit_mirror_queue_coordinator.erl
@@ -355,6 +355,15 @@ handle_cast({gm_deaths, DeadGMPids},
DeadPids),
rabbit_mirror_queue_misc:add_mirrors(QueueName, ExtraNodes, async),
noreply(State);
+ {ok, _MPid0, DeadPids, _ExtraNodes} ->
+ %% see rabbitmq-server#914;
+ %% Different slave is now master, stop current coordinator normally.
+ %% Initiating queue is now slave and the least we could do is report
+ %% deaths which we 'think' we saw.
+ %% NOTE: Reported deaths here, could be inconsistant.
+ rabbit_mirror_queue_misc:report_deaths(MPid, false, QueueName,
+ DeadPids),
+ {stop, normal, State};
{error, not_found} ->
{stop, normal, State}
end;
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
index 35ee87937e..59522da4a9 100644
--- a/src/rabbit_mirror_queue_misc.erl
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -78,7 +78,7 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
rabbit_misc:execute_mnesia_transaction(
fun () ->
%% Someone else could have deleted the queue before we
- %% get here.
+ %% get here. Or, gm group could've altered. see rabbitmq-server#914
case mnesia:read({rabbit_queue, QueueName}) of
[] -> {error, not_found};
[Q = #amqqueue { pid = QPid,
@@ -92,7 +92,16 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
AlivePids = [Pid || {_GM, Pid} <- AliveGM],
Alive = [Pid || Pid <- [QPid | SPids],
lists:member(Pid, AlivePids)],
- {QPid1, SPids1} = promote_slave(Alive),
+ {QPid1, SPids1} = case Alive of
+ [] ->
+ %% GM altered, & if all pids are
+ %% perceived as dead, rather do
+ %% do nothing here, & trust the
+ %% promoted slave to have updated
+ %% mnesia during the alteration.
+ {QPid, SPids};
+ _ -> promote_slave(Alive)
+ end,
Extra =
case {{QPid, SPids}, {QPid1, SPids1}} of
{Same, Same} ->
@@ -100,7 +109,8 @@ remove_from_queue(QueueName, Self, DeadGMPids) ->
_ when QPid =:= QPid1 orelse QPid1 =:= Self ->
%% Either master hasn't changed, so
%% we're ok to update mnesia; or we have
- %% become the master.
+ %% become the master. If gm altered,
+ %% we have no choice but to proceed.
Q1 = Q#amqqueue{pid = QPid1,
slave_pids = SPids1,
gm_pids = AliveGM},
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
index ec27eaf68a..12b13c36fd 100644
--- a/src/rabbit_mirror_queue_slave.erl
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -247,9 +247,15 @@ handle_call({gm_deaths, DeadGMPids}, From,
_ ->
%% master has changed to not us
gen_server2:reply(From, ok),
- %% assertion, we don't need to add_mirrors/2 in this
- %% branch, see last clause in remove_from_queue/2
- [] = ExtraNodes,
+ %% see rabbitmq-server#914;
+ %% It's not always guaranteed that we won't have ExtraNodes.
+ %% If gm alters, master can change to not us with extra nodes,
+ %% in which case we attempt to add mirrors on those nodes.
+ case ExtraNodes of
+ [] -> void;
+ _ -> rabbit_mirror_queue_misc:add_mirrors(
+ QName, ExtraNodes, async)
+ end,
%% Since GM is by nature lazy we need to make sure
%% there is some traffic when a master dies, to
%% make sure all slaves get informed of the
diff --git a/test/gm_SUITE.erl b/test/gm_SUITE.erl
index e4c68a257a..8b07c9efad 100644
--- a/test/gm_SUITE.erl
+++ b/test/gm_SUITE.erl
@@ -39,7 +39,8 @@ all() ->
confirmed_broadcast,
member_death,
receive_in_order,
- unexpected_msg
+ unexpected_msg,
+ down_in_members_change
].
init_per_suite(Config) ->
@@ -123,6 +124,41 @@ unexpected_msg(_Config) ->
passed
end).
+down_in_members_change(_Config) ->
+ %% Setup
+ ok = gm:create_tables(),
+ {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(),
+ fun rabbit_misc:execute_mnesia_transaction/1),
+ passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1),
+ {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(),
+ fun rabbit_misc:execute_mnesia_transaction/1),
+ passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2),
+ passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2),
+
+ %% Test. Simulate that the gm group is deleted (forget_group) while
+ %% processing the 'DOWN' message from the neighbour
+ process_flag(trap_exit, true),
+ ok = meck:new(mnesia, [passthrough]),
+ ok = meck:expect(mnesia, read, fun({gm_group, ?MODULE}) ->
+ [];
+ (Key) ->
+ meck:passthrough([Key])
+ end),
+ gm:leave(Pid2),
+ Passed = receive
+ {'EXIT', Pid, normal} ->
+ passed;
+ {'EXIT', Pid, _} ->
+ crashed
+ after 15000 ->
+ timeout
+ end,
+ %% Cleanup
+ meck:unload(mnesia),
+ process_flag(trap_exit, false),
+ passed = Passed.
+
+
do_broadcast(Fun) ->
with_two_members(broadcast_fun(Fun)).