diff --git a/pubsubplus/Chart.yaml b/pubsubplus/Chart.yaml
index 8d5fb64..3169fe1 100644
--- a/pubsubplus/Chart.yaml
+++ b/pubsubplus/Chart.yaml
@@ -1,8 +1,8 @@
apiVersion: v2
description: Deploy Solace PubSub+ Event Broker Singleton or HA redundancy group onto a Kubernetes Cluster
name: pubsubplus
-version: 3.3.1
-icon: https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/images/PubSubPlus.png
+version: 3.3.2
+icon: https://solaceproducts.github.io/pubsubplus-kubernetes-helm-quickstart/images/PubSubPlus.png
kubeVersion: '>= 1.10.0-0'
maintainers:
- name: Solace Community Forum
diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml
index 0540430..bb3dcf3 100644
--- a/pubsubplus/templates/solaceConfigMap.yaml
+++ b/pubsubplus/templates/solaceConfigMap.yaml
@@ -37,10 +37,9 @@ data:
cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} > /dev/shm/server.cert
export tls_servercertificate_filepath="/dev/shm/server.cert"
{{- end }}
+ # Deal with the fact we cannot accept "-" in router names
+ export routername=$(echo $(hostname) | sed 's/-//g')
{{- if .Values.solace.redundancy }}
- # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1
- # https://github.com/kubernetes/kubernetes/issues/40651
- # node_ordinal=$(STATEFULSET_ORDINAL)
IFS='-' read -ra host_array <<< $(hostname)
node_ordinal=${host_array[-1]}
if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then
@@ -49,9 +48,7 @@ data:
namespace=default
fi
service={{ template "solace.fullname" . }}
- # Deal with the fact we cannot accept "-" in routre names
service_name=$(echo ${service} | sed 's/-//g')
- export routername=$(echo $(hostname) | sed 's/-//g')
export redundancy_enable=yes
export configsync_enable=yes
export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length
@@ -92,6 +89,7 @@ data:
loop_guard=60
pause=10
count=0
+ # Wait for Solace Management API
while [ ${count} -lt ${loop_guard} ]; do
if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 -t ; then
break
@@ -131,6 +129,7 @@ data:
resync_step_required=""
role=""
count=0
+ # Determine node's primary or backup role
while [ ${count} -lt ${loop_guard} ]; do
role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
-q "" \
@@ -147,16 +146,16 @@ data:
;;
esac
((count++))
- echo "`date` INFO: ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's active-standby role"
+ echo "`date` INFO: ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's primary or backup role"
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
- echo "`date` ERROR: ${APP}-Could not determine this node's active-standby role" >&2
+ echo "`date` ERROR: ${APP}-Could not determine this node's primary or backup role" >&2
exit 1
fi
- # Determine local activity
+ echo "`date` INFO: ${APP}-Management API is up, determined that this node's role is: ${role}"
+ # Determine activity (local or mate active)
count=0
- echo "`date` INFO: ${APP}-Management API is up, determined that this node's active-standby role is: ${role}"
while [ ${count} -lt ${loop_guard} ]; do
online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
-q "" \
@@ -172,7 +171,7 @@ data:
echo "`date` INFO: ${APP}-Broker initial startup detected. This node will assert config-sync configuration over its mate"
resync_step_required="true"
else
- echo "`date` WARN: ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Normally expected nodes are Mate Active after restart"
+ echo "`date` WARN: ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Possibly a redeploy?"
fi
break
;;
@@ -182,15 +181,16 @@ data:
;;
esac
((count++))
- echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}"
+ echo "`date` INFO: ${APP}-Waited ${run_time} seconds, node activity state is: ${local_activity}"
sleep ${pause}
done
if [ ${count} -eq ${loop_guard} ]; then
- echo "`date` ERROR: ${APP}-Local activity state never become Local Active or Mate Active" >&2
+ echo "`date` ERROR: ${APP}-Node activity state never become Local Active or Mate Active" >&2
exit 1
fi
- # If we need to assert leader, then we need to wait for mate to reconcile
+ # If we need to assert leader, then first wait for mate to report Standby state
if [ "${resync_step_required}" = "true" ]; then
+ # This branch is AD-active only
count=0
echo "`date` INFO: ${APP}-Waiting for mate activity state to be 'Standby'"
while [ ${count} -lt ${loop_guard} ]; do
@@ -214,7 +214,7 @@ data:
exit 1
fi
fi # if assert-leader
- # Ensure Config-sync connection state is Connected before proceeding
+ # Ensure Config-sync connection state is Connected for both primary and backup before proceeding
count=0
echo "`date` INFO: ${APP}-Waiting for config-sync connected"
while [ ${count} -lt ${loop_guard} ]; do
@@ -239,11 +239,12 @@ data:
fi
# Now can issue assert-leader command
if [ "${resync_step_required}" = "true" ]; then
- echo "`date` INFO: ${APP}-Initiating assert-leader"
- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q ""
- /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "*"
+ # This branch is AD-active only
+ echo "`date` INFO: ${APP}-Initiating assert-leader"
+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q ""
+ /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "*"
fi
# Wait for config-sync results
count=0
@@ -263,7 +264,7 @@ data:
((count++))
echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up"
- # Additional check to confirm config-sync
+ # Additional checks to confirm config-sync (even if reported gloabally as not Up, it may be still up between local primary and backup in a DR setup)
echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
-q "" \
@@ -378,36 +379,15 @@ data:
IFS='-' read -ra host_array <<< $(hostname)
node_ordinal=${host_array[-1]}
password=`cat /mnt/disks/secrets/username_admin_password`
-
- # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started
- # This is an update if the LASTVERSION_FILE with K8s controller-revision-hash exists and contents differ from current value
- LASTVERSION_FILE=/var/lib/solace/var/lastConfigRevisionBeforeReboot
- if [ ! -f ${LASTVERSION_FILE} ] || [[ $(cat ${LASTVERSION_FILE}) != $(get_label "controller-revision-hash") ]] ; then
- echo "`date` INFO: ${APP}-Initial startup or Upgrade detected, running additional checks..."
- # Check redundancy
- echo "`date` INFO: ${APP}-Running checks. Redundancy state check started..."
- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
- redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- if [ "${redundancystatus_results}" != "Up" ]; then
- echo "`date` INFO: ${APP}-Redundancy state is not yet up."
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
- fi
-
- fi
- # Record current version in LASTVERSION_FILE
- echo $(get_label "controller-revision-hash") > ${LASTVERSION_FILE}
# For monitor node just check for redundancy; active label will never be set
if [ "${node_ordinal}" = "2" ]; then
# Check redundancy
- echo "`date` INFO: ${APP}-Running checks. Redundancy state check started..."
results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
-q "" \
-v "/rpc-reply/rpc/show/redundancy/redundancy-status"`
redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
if [ "${redundancystatus_results}" != "Up" ]; then
- echo "`date` INFO: ${APP}-Redundancy state is not yet up."
+ echo "`date` INFO: ${APP}-Waiting for redundancy up, redundancy state is not yet up."
rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
fi
if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
@@ -418,6 +398,7 @@ data:
fi
exit 0
fi # End Monitor Node
+ # From here only message routing nodes.
# For Primary or Backup nodes set both service readiness (active label) and k8s readiness (exit return value)
health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active`
case "${health_result}" in
@@ -467,54 +448,52 @@ data:
echo "`date` INFO: ${APP}-Running checks.Redundancy state is not yet up."
rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
fi
- # Additionally check config-sync status for non-monitoring nodes
- if [ "${node_ordinal}" != "2" ]; then
- results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
- confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- if [ "${confsyncstatus_results}" != "Up" ]; then
+ # Check config-sync status
+ results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "" \
+ -v "/rpc-reply/rpc/show/config-sync/status/oper-status"`
+ confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
+ if [ "${confsyncstatus_results}" != "Up" ]; then
- # Additional check to confirm config-sync
- echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
+ # Additional check to confirm config-sync
+ echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..."
- messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
- messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
+ messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "" \
+ -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"`
+ messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- # Count message_vpns in-sync and compare with total
- localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "count(//table[sync-state='In-Sync'])"`
- local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
- echo "`date` INFO: ${APP}-Config-sync state is not in-sync locally."
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
- fi
+ # Count message_vpns in-sync and compare with total
+ localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "" \
+ -v "count(//table[sync-state='In-Sync'])"`
+ local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
+ if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then
+ echo "`date` INFO: ${APP}-Config-sync state is not in-sync locally."
+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
+ fi
- echo "`date` INFO: ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
- vpnremotehamate_result=$(get_router_remote_config_state "name")
+ echo "`date` INFO: ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..."
+ vpnremotehamate_result=$(get_router_remote_config_state "name")
- remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
- remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
+ remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "" \
+ -v "count(//table/source-router[name='$vpnremotehamate_result'])"`
+ remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- #Count message_vpns in-sync, not stale and compare with total
- remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
- -q "" \
- -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
- remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
- if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
- echo "`date` INFO: ${APP}-Config-sync state is not in-sync for remote."
- rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
- fi
+ #Count message_vpns in-sync, not stale and compare with total
+ remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \
+ -q "" \
+ -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"`
+ remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`
+ if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then
+ echo "`date` INFO: ${APP}-Config-sync state is not in-sync for remote."
+ rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1
fi
fi
# Pass readiness check
if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then
- echo "`date` INFO: ${APP}-Redundancy is up and node is mate Active"
+ echo "`date` INFO: ${APP}-Redundancy is up and node is Mate Active"
touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}
echo "`date` INFO: ${APP}-Server status check complete for this broker node"
exit 1