Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect ceph data for External mode #10594

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions ocs_ci/ocs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import yaml
from gevent import sleep
from pathlib import Path
from libcloud.common.exceptions import BaseHTTPError
from libcloud.common.types import LibcloudError
from libcloud.compute.providers import get_driver
Expand Down Expand Up @@ -970,6 +971,8 @@ def run_must_gather(log_dir_path, image, command=None, cluster_config=None):
timeout=must_gather_timeout,
cluster_config=cluster_config,
)
if config.DEPLOYMENT["external_mode"]:
collect_ceph_external(path=log_dir_path)
except CommandFailed as ex:
log.error(
f"Failed during must gather logs! Error: {ex}"
Expand All @@ -986,6 +989,30 @@ def run_must_gather(log_dir_path, image, command=None, cluster_config=None):
return mg_output


def collect_ceph_external(path):
"""
Collect ceph commands via cli tool on External mode cluster

Args:
path(str): The destination for saving the ceph files [output ceph commands]

"""
try:
kubeconfig_path = os.path.join(
config.ENV_DATA["cluster_path"], config.RUN["kubeconfig_location"]
)
current_dir = Path(__file__).parent.parent.parent
script_path = os.path.join(current_dir, "scripts", "bash", "mg_external.sh")
run_cmd(
f"sh {script_path} {os.path.join(path, 'ceph_external')} {kubeconfig_path}",
timeout=100,
)
except Exception as ex:
log.info(
f"Failed to execute the ceph commands script due to the error {str(ex)}"
)


def export_mg_pods_logs(log_dir_path):
"""
Export must gather pods logs
Expand Down
151 changes: 151 additions & 0 deletions scripts/bash/mg_external.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env bash

set -x

dbglog() {
# Allow the input to be piped
declare msg=${1:-$(</dev/stdin)}

echo -e "${msg}" | tee -a "${BASE_COLLECTION_PATH}"/ceph/gather-debug.log
}


# Expect base collection path as an exported variable
# If it is not defined, use PWD instead
BASE_COLLECTION_PATH=${1:-"$(pwd)"}
echo $BASE_COLLECTION_PATH

KUBECONFIG=${2:-"~/.kube/config"}
ns=${3:-"openshift-storage"}

TOOL_POD_NAME=$(oc get pods --no-headers -n ${ns} -l app='rook-ceph-tools' | awk '{print $1}')
if [ -z "$TOOL_POD_NAME" ]; then
dbglog "No tool pod found"
echo "No tool pod found"
exit 2
fi


gather_common_ceph_resources "${BASE_COLLECTION_PATH}"
CEPH_GATHER_DBGLOG="${BASE_COLLECTION_PATH}"/gather-ceph-debug.log
CEPH_COLLECTION_PATH="${BASE_COLLECTION_PATH}/ceph"
COMMAND_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands
COMMAND_JSON_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands_json_output
COMMAND_ERR_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/logs
mkdir -p "${COMMAND_OUTPUT_DIR}"
mkdir -p "${COMMAND_JSON_OUTPUT_DIR}"
mkdir -p "${COMMAND_ERR_OUTPUT_DIR}"


pids_ceph=()

# Ceph commands1
ceph_commands1=()
ceph_commands1+=("ceph auth list")
ceph_commands1+=("ceph balancer pool ls")
ceph_commands1+=("ceph balancer status")
ceph_commands1+=("ceph config dump")
ceph_commands1+=("ceph config-key ls")
ceph_commands1+=("ceph crash ls")
ceph_commands1+=("ceph crash stat")
ceph_commands1+=("ceph device ls")
ceph_commands1+=("ceph df detail")
ceph_commands1+=("ceph fs dump")
ceph_commands1+=("ceph fs ls")
ceph_commands1+=("ceph fs status")
ceph_commands1+=("ceph health detail")
ceph_commands1+=("ceph healthcheck history ls")
ceph_commands1+=("ceph mds stat")
ceph_commands1+=("ceph mgr dump")
ceph_commands1+=("ceph mgr module ls")
ceph_commands1+=("ceph mgr services")
ceph_commands1+=("ceph mon stat")
ceph_commands1+=("ceph mon dump")
ceph_commands1+=("ceph osd df tree")
ceph_commands1+=("ceph osd tree")
ceph_commands1+=("ceph osd stat")
ceph_commands1+=("ceph osd dump")
ceph_commands1+=("ceph osd utilization")
ceph_commands1+=("ceph osd crush show-tunables")
ceph_commands1+=("ceph osd crush dump")
ceph_commands1+=("ceph osd crush weight-set ls")

# Ceph commands2
ceph_commands2=()
ceph_commands2+=("ceph osd crush weight-set dump")
ceph_commands2+=("ceph osd crush rule dump")
ceph_commands2+=("ceph osd crush rule ls")
ceph_commands2+=("ceph osd crush class ls")
ceph_commands2+=("ceph osd perf")
ceph_commands2+=("ceph osd numa-status")
ceph_commands2+=("ceph osd getmaxosd")
ceph_commands2+=("ceph osd pool ls detail")
ceph_commands2+=("ceph osd lspools")
ceph_commands2+=("ceph osd df")
ceph_commands2+=("ceph osd blocked-by")
ceph_commands2+=("ceph osd blacklist ls")
ceph_commands2+=("ceph osd pool autoscale-status")
ceph_commands2+=("ceph pg dump")
ceph_commands2+=("ceph pg stat")
ceph_commands2+=("ceph progress")
ceph_commands2+=("ceph progress json")
ceph_commands2+=("ceph quorum_status")
ceph_commands2+=("ceph rbd task list")
ceph_commands2+=("ceph report")
ceph_commands2+=("ceph service dump")
ceph_commands2+=("ceph status")
ceph_commands2+=("ceph time-sync-status")
ceph_commands2+=("ceph versions")
ceph_commands2+=("ceph log last 10000 debug cluster")
ceph_commands2+=("ceph log last 10000 debug audit")
ceph_commands2+=("rados lspools")
ceph_commands2+=("rados ls --pool=ocs-storagecluster-cephblockpool")
ceph_commands2+=("rados ls --pool=ocs-storagecluster-cephfilesystem-metadata --namespace=csi")



# Collecting output of ceph osd config
for i in $(timeout 120 oc -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "ceph osd tree --connect-timeout=15 | grep up " | awk '{print $4}'); do
{ timeout 120 oc -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "ceph config show $i" >>"${COMMAND_OUTPUT_DIR}/config_$i"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-config-"$i"-debug.log 2>&1 &
pids_ceph+=($!)
done
# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi


# Collecting output of ceph commands
for ((i = 0; i < ${#ceph_commands1[@]}; i++)); do
dbglog "collecting command output for: ${ceph_commands1[$i]}"
COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_commands1[$i]// /_}
JSON_COMMAND_OUTPUT_FILE=${COMMAND_JSON_OUTPUT_DIR}/${ceph_commands1[$i]// /_}_--format_json-pretty
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands1[$i]} --connect-timeout=15" >>"${COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands1[$i]}"-debug.log 2>&1 &
pids_ceph+=($!)
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands1[$i]} --connect-timeout=15 --format json-pretty" >>"${JSON_COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands1[$i]}"-json-debug.log 2>&1 &
pids_ceph+=($!)
done
# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi


# Collecting output of ceph commands
for ((i = 0; i < ${#ceph_commands2[@]}; i++)); do
dbglog "collecting command output for: ${ceph_commands2[$i]}"
COMMAND_OUTPUT_FILE=${COMMAND_OUTPUT_DIR}/${ceph_commands2[$i]// /_}
JSON_COMMAND_OUTPUT_FILE=${COMMAND_JSON_OUTPUT_DIR}/${ceph_commands2[$i]// /_}_--format_json-pretty
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands2[$i]} --connect-timeout=15" >>"${COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands2[$i]}"-debug.log 2>&1 &
pids_ceph+=($!)
{ timeout 120 oc --kubeconfig="${KUBECONFIG}" -n "${ns}" exec "${TOOL_POD_NAME}" -- bash -c "${ceph_commands2[$i]} --connect-timeout=15 --format json-pretty" >>"${JSON_COMMAND_OUTPUT_FILE}"; } >>"${COMMAND_ERR_OUTPUT_DIR}"/gather-"${ceph_commands2[$i]}"-json-debug.log 2>&1 &
pids_ceph+=($!)
done

# Check if PID array has any values, if so, wait for them to finish
if [ ${#pids[@]} -ne 0 ]; then
echo "Waiting on subprocesses to finish execution."
wait "${pids[@]}"
fi
26 changes: 26 additions & 0 deletions tests/manage/z_cluster/test_bz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import logging

from ocs_ci.framework.testlib import ManageTest


logger = logging.getLogger(__name__)


class TestAcceptance(ManageTest):
"""
Acceptance test Managed Service
"""

def test_acceptance(self):
assert 1 == 2
# from ocs_ci.ocs.utils import _collect_ocs_logs
# from ocs_ci.framework import config as ocsci_config
# _collect_ocs_logs(
# ocsci_config,
# dir_name="/home/oviner/test/test2",
# ocp=False,
# ocs=True,
# mcg=False,
# )
# a=1
Loading