#!/bin/bash
set -eux
set -o pipefail

LOCAL_RABBIT_HOST="$(os-apply-config --key bootstrap_host.nodeid --type netaddress --key-default '')"
NODES=$(os-apply-config --key rabbit.nodes --type raw --key-default '' | sed 's/,/\n/g' | sort)
# convert nodes to lowercase because rabbitmq uses hostname locally, hostname
# is lowercased by cloud-init. If uppercase is used for node names then
# there would be mismatch.
LOCAL_RABBIT_HOST=${LOCAL_RABBIT_HOST,,}
NODES=(${NODES,,})
TOTAL_NODES=${#NODES[@]}



# Insufficient meta-data to attempt to start-up RabbitMQ.
if [ -z "${LOCAL_RABBIT_HOST}" ]; then
    echo "RabbitMQ bootstrap_host.nodeid is not defined in meta-data, aborting."
    exit 255
fi

os-svc-enable -n rabbitmq-server

## Non-cluster configuration set-up. ##
if [ ${TOTAL_NODES} -le 1 ]; then
    os-svc-restart -n rabbitmq-server
    echo "RabbitMQ non-cluster configuration complete..."
    exit 0
fi

## Cluster configuration set-up. ##
function is_in_cluster() {
    # Returns true if the list following "running_nodes" in rabbitmqctl
    # cluster_status contains at least two nodes.
    rabbitmqctl cluster_status 2>/dev/null |
        grep -q "running_nodes,\[[^]]\+,"
}

# Number of nodes in the cluster according to remote node $1.
# If $1 isn't in a cluster or it's in a cluster by itself, then this will
# return 0.
function cluster_size() {
    local remote_node="${1}"
    rabbitmqctl -n "rabbit@${remote_node}" cluster_status 2>/dev/null |
        sed -n '/{running_nodes,\[[^]]\+,/,/\]\},/p' |
        wc -l
}

function leave_cluster() {
    rabbitmqctl stop_app
    # This syncs all data into the cluster, then removes this node, cleaning local mnesia.
    rabbitmqctl reset
}
export -f leave_cluster

function join_cluster_with() {
    local remote_node="${1}"
    local local_node="${2}"
    rabbitmqctl stop_app
    rabbitmqctl join_cluster "rabbit@${remote_node}" 2>/dev/null || true
    rabbitmqctl start_app

    if ! is_in_cluster; then
       echo "Failed to join node [${local_node}] with [${remote_node}]..."
       return 1
    fi
}

BOOTSTRAP_NODE="$(os-apply-config --key bootstrap_host.bootstrap_nodeid --type netaddress --key-default '')"
BOOTSTRAP_NODE=${BOOTSTRAP_NODE,,}
NODE_INDEX=""
# Find the nodes being worked on in the NODES array.
for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
   if [ "${NODES[$index]}" == "${LOCAL_RABBIT_HOST}" ]; then
      NODE_INDEX=${index}
   fi
done

if [ -z "${BOOTSTRAP_NODE}" -o ${TOTAL_NODES} -lt 3 -o -z "${NODE_INDEX}" ]; then
    #      We do not know who the bootstrap is, why are we attempting to bring up a Rabbit cluster?
    # -OR- we do not have sufficient nodes to support HA so lets abort.
    # -OR- we did not find our node in the array and hence did not set node_indexs.
    echo "bootstrap_host.bootstrap_nodeid: ${BOOTSTRAP_NODE}, TOTAL_NODES: ${TOTAL_NODES}, NODE_INDEX: ${NODE_INDEX}"
    echo "RabbitMQ cluster configuration prerequisites not met, aborting."
    exit 255
fi

for (( index = 0; index < ${TOTAL_NODES}; index++ )); do
    if ! ping -c1 "${NODES[$index]}"; then
        echo "RabbitMQ host unreachable: ${NODES[$index]}"
        HOST_UNREACHABLE=1
    fi
done
[ -z "${HOST_UNREACHABLE:-}" ] || exit 1

# Refuse to stop unless all nodes are running, this avoids pause_minority.
# From the RabbitMQ docs: pause_minority
#     Your network is maybe less reliable. You have clustered across 3 AZs
#     in EC2, and you assume that only one AZ will fail at once. In that
#     scenario you want the remaining two AZs to continue working and the
#     nodes from the failed AZ to rejoin automatically and without fuss when
#     the AZ comes back.
# (See: os-apply-config/etc/rabbitmq/rabbitmq.config)
#
# We want to orchestrate nodes leaving the cluster. We'll do this using a
# metronome.  For example, if we have 3 nodes, there will be six periods.
# The first node may leave in period 0. The second node may leave in period
# 1. The third node may leave in period 2.
#
# Metronome:   0 .. 1 .. 2 .. 3 .. 4 .. 5 ..
# Node leaves: 0 ....... 1 ....... 2 .......
#
# The dead periods in between allow for $PERIOD seconds of clock
# desynchronization. PERIOD should be about the half the length of time it
# takes for a node to join the cluster.
PERIOD=10
NODE_LEAVES_AT=$(( ${NODE_INDEX} * 2 ))
while is_in_cluster; do
    NODES_IN_CLUSTER=$(cluster_size "${BOOTSTRAP_NODE}")
    if [ ${NODES_IN_CLUSTER} -gt ${TOTAL_NODES} ]; then
        echo "A node we don't know about appears to have joined the cluster, aborting."
        exit 255
    fi

    METRONOME=$(( ($(date +%s) / ${PERIOD}) % (${TOTAL_NODES} * 2) ))
    if [ ${NODES_IN_CLUSTER} -eq ${TOTAL_NODES} -a \
         ${METRONOME} -eq ${NODE_LEAVES_AT} ]; then
        # All other nodes are in the cluster and it's our allotted time,
        # safe to leave. Tell other nodes we're about to leave the cluster.
        echo "Leaving cluster..."
        timeout 300 bash -c leave_cluster || { rabbitmqctl start_app && exit 1; }
    else
        echo "Refusing to allow node to leave cluster..."
    fi
    sleep 2
done

# Restart RabbitMQ. We need to have left the cluster first or we risk data loss.
os-svc-restart -n rabbitmq-server

# We're the bootstrap node
if [ "${LOCAL_RABBIT_HOST}" == "${BOOTSTRAP_NODE}" ]; then
   # If we are not in a cluster keep trying to join a node.
   # Note: This loop is required as the BOOTSTRAP_NODE may have left a running
   #       cluster and it therefore must re-join.
   while ! is_in_cluster; do
       # Try to join with each node in turn.
       COUNT=$(( (${COUNT:-0} + 1)  % ${TOTAL_NODES} ))
       if [ ${COUNT} -ne ${NODE_INDEX} ]; then
          join_cluster_with "${NODES[${COUNT}]}" "${LOCAL_RABBIT_HOST}" || true
       fi
   done

   # Check that we have not got a partition i.e. The case where we do not have
   # synced clocks and hence we can get split in the clustering A+B C. If we
   # get this we will wait as this is more favourable than a bad/broken
   # cluster set-up.
   while [[ $(cluster_size "${LOCAL_RABBIT_HOST}") -ne ${TOTAL_NODES} ]]; do
       echo "Waiting for nodes to join [${BOOTSTRAP_NODE}]..."
       sleep 10
   done
else
   # Wait until the BOOTSTRAP_NODE has at least formed a cluster with one node.
   while [[ $(cluster_size "${BOOTSTRAP_NODE}") -lt 2 ]]; do
       echo "Waiting for bootstrap node to initialise the cluster..."
       sleep 10
   done
   is_in_cluster || join_cluster_with "${BOOTSTRAP_NODE}" "${LOCAL_RABBIT_HOST}"
fi

# Make sure that all queues (except those with auto-generated names) are
# mirrored across all nodes in the cluster running:
rabbitmqctl set_policy HA '^(?!amq\.).*' '{"ha-mode": "all"}'

echo "RabbitMQ cluster configuration complete..."
