Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect ceph data for External mode #10594

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions Docker_files/scripts/mg_external.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env bash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if Docker_files is right folder. I think we should move it or create other one as it's not related to any docker file or so.


set -x

dbglog() {
# Allow the input to be piped
declare msg=${1:-$(</dev/stdin)}

echo -e "${msg}" | tee -a "${BASE_COLLECTION_PATH}"/gather-debug.log
}


# Expect base collection path as an exported variable
# If it is not defined, use PWD instead
BASE_COLLECTION_PATH=${1:-"$(pwd)"}
echo $BASE_COLLECTION_PATH

KUBECONFIG=${2:-"~/.kube/config"}
ns=${3:-"openshift-storage"}

TOOL_POD_NAME=$(oc get pods --no-headers -n ${ns} -l app='rook-ceph-tools' | awk '{print $1}')
if [ -z "$TOOL_POD_NAME" ]; then
dbglog "No tool pod found"
echo "No tool pod found"
exit -1
fi


gather_common_ceph_resources "${BASE_COLLECTION_PATH}"
CEPH_GATHER_DBGLOG="${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
CEPH_COLLECTION_PATH="${BASE_COLLECTION_PATH}/ceph"
COMMAND_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands
COMMAND_JSON_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands_json_output
COMMAND_ERR_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/logs
mkdir -p "${COMMAND_OUTPUT_DIR}"
mkdir -p "${COMMAND_JSON_OUTPUT_DIR}"
mkdir -p "${COMMAND_ERR_OUTPUT_DIR}"


pids_ceph=()

# Ceph commands1
ceph_commands1=()
ceph_commands1+=("ceph auth list")
ceph_commands1+=("ceph balancer pool ls")
ceph_commands1+=("ceph balancer status")
ceph_commands1+=("ceph config dump")
ceph_commands1+=("ceph config-key ls")
ceph_commands1+=("ceph crash ls")
ceph_commands1+=("ceph crash stat")
ceph_commands1+=("ceph device ls")
ceph_commands1+=("ceph df detail")
ceph_commands1+=("ceph fs dump")
ceph_commands1+=("ceph fs ls")
ceph_commands1+=("ceph fs status")
ceph_commands1+=("ceph health detail")
ceph_commands1+=("ceph healthcheck history ls")
ceph_commands1+=("ceph mds stat")
ceph_commands1+=("ceph mgr dump")
ceph_commands1+=("ceph mgr module ls")
ceph_commands1+=("ceph mgr services")
ceph_commands1+=("ceph mon stat")
ceph_commands1+=("ceph mon dump")
ceph_commands1+=("ceph osd df tree")
ceph_commands1+=("ceph osd tree")
ceph_commands1+=("ceph osd stat")
ceph_commands1+=("ceph osd dump")
ceph_commands1+=("ceph osd utilization")
ceph_commands1+=("ceph osd crush show-tunables")
ceph_commands1+=("ceph osd crush dump")
ceph_commands1+=("ceph osd crush weight-set ls")

# Ceph commands2
ceph_commands2=()
ceph_commands2+=("ceph osd crush weight-set dump")
ceph_commands2+=("ceph osd crush rule dump")
ceph_commands2+=("ceph osd crush rule ls")
ceph_commands2+=("ceph osd crush class ls")
ceph_commands2+=("ceph osd perf")
ceph_commands2+=("ceph osd numa-status")
ceph_commands2+=("ceph osd getmaxosd")
ceph_commands2+=("ceph osd pool ls detail")
ceph_commands2+=("ceph osd lspools")
ceph_commands2+=("ceph osd df")
ceph_commands2+=("ceph osd blocked-by")
ceph_commands2+=("ceph osd blacklist ls")
ceph_commands2+=("ceph osd pool autoscale-status")
ceph_commands2+=("ceph pg dump")
ceph_commands2+=("ceph pg stat")
ceph_commands2+=("ceph progress")
ceph_commands2+=("ceph progress json")
ceph_commands2+=("ceph quorum_status")
ceph_commands2+=("ceph rbd task list")
ceph_commands2+=("ceph report")
ceph_commands2+=("ceph service dump")
ceph_commands2+=("ceph status")
ceph_commands2+=("ceph time-sync-status")
ceph_commands2+=("ceph versions")
ceph_commands2+=("ceph log last 10000 debug cluster")
ceph_commands2+=("ceph log last 10000 debug audit")
ceph_commands2+=("rados lspools")
ceph_commands2+=("rados ls --pool=ocs-storagecluster-cephblockpool")
ceph_commands2+=("rados ls --pool=ocs-storagecluster-cephfilesystem-metadata --namespace=csi")



# Collecting output of ceph osd config
for i in $(timeout 120 oc -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "ceph osd tree --connect-timeout=15 | grep up " | awk '{print $4}'); do
{ timeout 120 oc -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "ceph config show $i" >>"${COMMAND_OUTPUT_DIR}/config_$i"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-config-"$i"-debug.log 2>&1 &
pids_ceph+=($!)
done
# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi


# Collecting output of ceph commands
for ((i = 0; i < ${#ceph_commands1[@]}; i++)); do
dbglog "collecting command output for: ${ceph_commands1[$i]}"
COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_commands1[$i]// /_}
JSON_COMMAND_OUTPUT_FILE=${COMMAND_JSON_OUTPUT_DIR}/${ceph_commands1[$i]// /_}_--format_json-pretty
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands1[$i]} --connect-timeout=15" >>"${COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands1[$i]}"-debug.log 2>&1 &
pids_ceph+=($!)
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands1[$i]} --connect-timeout=15 --format json-pretty" >>"${JSON_COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands1[$i]}"-json-debug.log 2>&1 &
pids_ceph+=($!)
done
# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi


# Collecting output of ceph commands
for ((i = 0; i < ${#ceph_commands2[@]}; i++)); do
dbglog "collecting command output for: ${ceph_commands2[$i]}"
COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_commands2[$i]// /_}
JSON_COMMAND_OUTPUT_FILE=${COMMAND_JSON_OUTPUT_DIR}/${ceph_commands2[$i]// /_}_--format_json-pretty
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands2[$i]} --connect-timeout=15" >>"${COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands2[$i]}"-debug.log 2>&1 &
pids_ceph+=($!)
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands2[$i]} --connect-timeout=15 --format json-pretty" >>"${JSON_COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands2[$i]}"-json-debug.log 2>&1 &
pids_ceph+=($!)
done

# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi
29 changes: 29 additions & 0 deletions ocs_ci/ocs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import yaml
from gevent import sleep
from pathlib import Path
from libcloud.common.exceptions import BaseHTTPError
from libcloud.common.types import LibcloudError
from libcloud.compute.providers import get_driver
Expand Down Expand Up @@ -970,6 +971,8 @@ def run_must_gather(log_dir_path, image, command=None, cluster_config=None):
timeout=must_gather_timeout,
cluster_config=cluster_config,
)
if config.DEPLOYMENT["external_mode"]:
collect_ceph_external(path=log_dir_path)
except CommandFailed as ex:
log.error(
f"Failed during must gather logs! Error: {ex}"
Expand All @@ -986,6 +989,32 @@ def run_must_gather(log_dir_path, image, command=None, cluster_config=None):
return mg_output


def collect_ceph_external(path):
"""
Collect ceph commands via cli tool on External mode cluster
Args:
path(str): The destination for saving the ceph files [output ceph commands]
"""
try:
kubeconfig_path = os.path.join(
config.ENV_DATA["cluster_path"], config.RUN["kubeconfig_location"]
)
current_dir = Path(__file__).parent.parent.parent
script_path = os.path.join(
current_dir, "Docker_files", "scripts", "mg_external.sh"
)
run_cmd(
f"sh {script_path} {os.path.join(path, 'ceph_external')} {kubeconfig_path}",
timeout=100,
)
except Exception as ex:
log.info(
f"Failed to execute the ceph commands script due to the error {str(ex)}"
)


def export_mg_pods_logs(log_dir_path):
"""
Export must gather pods logs
Expand Down
Loading