diff --git a/pubsubplus/Chart.yaml b/pubsubplus/Chart.yaml index 8d5fb64..3169fe1 100644 --- a/pubsubplus/Chart.yaml +++ b/pubsubplus/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 description: Deploy Solace PubSub+ Event Broker Singleton or HA redundancy group onto a Kubernetes Cluster name: pubsubplus -version: 3.3.1 -icon: https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/images/PubSubPlus.png +version: 3.3.2 +icon: https://solaceproducts.github.io/pubsubplus-kubernetes-helm-quickstart/images/PubSubPlus.png kubeVersion: '>= 1.10.0-0' maintainers: - name: Solace Community Forum diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 0540430..bb3dcf3 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -37,10 +37,9 @@ data: cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} > /dev/shm/server.cert export tls_servercertificate_filepath="/dev/shm/server.cert" {{- end }} + # Deal with the fact we cannot accept "-" in router names + export routername=$(echo $(hostname) | sed 's/-//g') {{- if .Values.solace.redundancy }} - # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1 - # https://github.com/kubernetes/kubernetes/issues/40651 - # node_ordinal=$(STATEFULSET_ORDINAL) IFS='-' read -ra host_array <<< $(hostname) node_ordinal=${host_array[-1]} if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then @@ -49,9 +48,7 @@ data: namespace=default fi service={{ template "solace.fullname" . }} - # Deal with the fact we cannot accept "-" in routre names service_name=$(echo ${service} | sed 's/-//g') - export routername=$(echo $(hostname) | sed 's/-//g') export redundancy_enable=yes export configsync_enable=yes export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length @@ -92,6 +89,7 @@ data: loop_guard=60 pause=10 count=0 + # Wait for Solace Management API while [ ${count} -lt ${loop_guard} ]; do if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 -t ; then break @@ -131,6 +129,7 @@ data: resync_step_required="" role="" count=0 + # Determine node's primary or backup role while [ ${count} -lt ${loop_guard} ]; do role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ @@ -147,16 +146,16 @@ data: ;; esac ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's active-standby role" + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's primary or backup role" sleep ${pause} done if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Could not determine this node's active-standby role" >&2 + echo "`date` ERROR: ${APP}-Could not determine this node's primary or backup role" >&2 exit 1 fi - # Determine local activity + echo "`date` INFO: ${APP}-Management API is up, determined that this node's role is: ${role}" + # Determine activity (local or mate active) count=0 - echo "`date` INFO: ${APP}-Management API is up, determined that this node's active-standby role is: ${role}" while [ ${count} -lt ${loop_guard} ]; do online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ @@ -172,7 +171,7 @@ data: echo "`date` INFO: ${APP}-Broker initial startup detected. This node will assert config-sync configuration over its mate" resync_step_required="true" else - echo "`date` WARN: ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Normally expected nodes are Mate Active after restart" + echo "`date` WARN: ${APP}-Unexpected state: this is not an initial startup of the broker and this node reports Local Active. Possibly a redeploy?" fi break ;; @@ -182,15 +181,16 @@ data: ;; esac ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}" + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, node activity state is: ${local_activity}" sleep ${pause} done if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Local activity state never become Local Active or Mate Active" >&2 + echo "`date` ERROR: ${APP}-Node activity state never become Local Active or Mate Active" >&2 exit 1 fi - # If we need to assert leader, then we need to wait for mate to reconcile + # If we need to assert leader, then first wait for mate to report Standby state if [ "${resync_step_required}" = "true" ]; then + # This branch is AD-active only count=0 echo "`date` INFO: ${APP}-Waiting for mate activity state to be 'Standby'" while [ ${count} -lt ${loop_guard} ]; do @@ -214,7 +214,7 @@ data: exit 1 fi fi # if assert-leader - # Ensure Config-sync connection state is Connected before proceeding + # Ensure Config-sync connection state is Connected for both primary and backup before proceeding count=0 echo "`date` INFO: ${APP}-Waiting for config-sync connected" while [ ${count} -lt ${loop_guard} ]; do @@ -239,11 +239,12 @@ data: fi # Now can issue assert-leader command if [ "${resync_step_required}" = "true" ]; then - echo "`date` INFO: ${APP}-Initiating assert-leader" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "*" + # This branch is AD-active only + echo "`date` INFO: ${APP}-Initiating assert-leader" + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "*" fi # Wait for config-sync results count=0 @@ -263,7 +264,7 @@ data: ((count++)) echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up" - # Additional check to confirm config-sync + # Additional checks to confirm config-sync (even if reported gloabally as not Up, it may be still up between local primary and backup in a DR setup) echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..." messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ @@ -378,36 +379,15 @@ data: IFS='-' read -ra host_array <<< $(hostname) node_ordinal=${host_array[-1]} password=`cat /mnt/disks/secrets/username_admin_password` - - # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started - # This is an update if the LASTVERSION_FILE with K8s controller-revision-hash exists and contents differ from current value - LASTVERSION_FILE=/var/lib/solace/var/lastConfigRevisionBeforeReboot - if [ ! -f ${LASTVERSION_FILE} ] || [[ $(cat ${LASTVERSION_FILE}) != $(get_label "controller-revision-hash") ]] ; then - echo "`date` INFO: ${APP}-Initial startup or Upgrade detected, running additional checks..." - # Check redundancy - echo "`date` INFO: ${APP}-Running checks. Redundancy state check started..." - results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "/rpc-reply/rpc/show/redundancy/redundancy-status"` - redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - if [ "${redundancystatus_results}" != "Up" ]; then - echo "`date` INFO: ${APP}-Redundancy state is not yet up." - rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 - fi - - fi - # Record current version in LASTVERSION_FILE - echo $(get_label "controller-revision-hash") > ${LASTVERSION_FILE} # For monitor node just check for redundancy; active label will never be set if [ "${node_ordinal}" = "2" ]; then # Check redundancy - echo "`date` INFO: ${APP}-Running checks. Redundancy state check started..." results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/redundancy-status"` redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` if [ "${redundancystatus_results}" != "Up" ]; then - echo "`date` INFO: ${APP}-Redundancy state is not yet up." + echo "`date` INFO: ${APP}-Waiting for redundancy up, redundancy state is not yet up." rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then @@ -418,6 +398,7 @@ data: fi exit 0 fi # End Monitor Node + # From here only message routing nodes. # For Primary or Backup nodes set both service readiness (active label) and k8s readiness (exit return value) health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active` case "${health_result}" in @@ -467,54 +448,52 @@ data: echo "`date` INFO: ${APP}-Running checks.Redundancy state is not yet up." rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi - # Additionally check config-sync status for non-monitoring nodes - if [ "${node_ordinal}" != "2" ]; then - results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` - confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - if [ "${confsyncstatus_results}" != "Up" ]; then + # Check config-sync status + results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" \ + -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` + confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + if [ "${confsyncstatus_results}" != "Up" ]; then - # Additional check to confirm config-sync - echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..." + # Additional check to confirm config-sync + echo "`date` INFO: ${APP}-Checking Config-sync Setup. Starting additional checks to confirm config-sync locally..." - messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"` - messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" \ + -v "count(/rpc-reply/rpc/show/config-sync/database/local/tables/table)"` + messagevpn_total=`echo ${messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - # Count message_vpns in-sync and compare with total - localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "count(//table[sync-state='In-Sync'])"` - local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then - echo "`date` INFO: ${APP}-Config-sync state is not in-sync locally." - rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 - fi + # Count message_vpns in-sync and compare with total + localmessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" \ + -v "count(//table[sync-state='In-Sync'])"` + local_messagevpn_total_insync=`echo ${localmessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + if [ "$messagevpn_total" -ne "$local_messagevpn_total_insync" ]; then + echo "`date` INFO: ${APP}-Config-sync state is not in-sync locally." + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 + fi - echo "`date` INFO: ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..." - vpnremotehamate_result=$(get_router_remote_config_state "name") + echo "`date` INFO: ${APP}-Checking Config-sync Setup. Remote config-sync state check starting..." + vpnremotehamate_result=$(get_router_remote_config_state "name") - remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "count(//table/source-router[name='$vpnremotehamate_result'])"` - remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + remote_messagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" \ + -v "count(//table/source-router[name='$vpnremotehamate_result'])"` + remote_messagevpn_total=`echo ${remote_messagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - #Count message_vpns in-sync, not stale and compare with total - remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ - -q "" \ - -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"` - remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then - echo "`date` INFO: ${APP}-Config-sync state is not in-sync for remote." - rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 - fi + #Count message_vpns in-sync, not stale and compare with total + remotemessagevpn_result=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ + -q "" \ + -v "count(//table/source-router[name='$vpnremotehamate_result' and sync-state='In-Sync' and stale='No'])"` + remote_messagevpn_total_insync=`echo ${remotemessagevpn_result} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + if [ "$remote_messagevpn_total" -ne "$remote_messagevpn_total_insync" ]; then + echo "`date` INFO: ${APP}-Config-sync state is not in-sync for remote." + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi fi # Pass readiness check if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then - echo "`date` INFO: ${APP}-Redundancy is up and node is mate Active" + echo "`date` INFO: ${APP}-Redundancy is up and node is Mate Active" touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} echo "`date` INFO: ${APP}-Server status check complete for this broker node" exit 1