Skip to content

Commit

Permalink
Add debuggability for reboot function
Browse files Browse the repository at this point in the history
1. Add function to collect console log from starting reboot to dut up
2. When dut is not up, check if dut is pingable and collect the mgmt interface config

Change-Id: I700340d3fbfc00d61bc6e508ac5be4e4dc0a25e6
  • Loading branch information
JibinBao committed Sep 9, 2024
1 parent 7b5156e commit 2653915
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 101 deletions.
104 changes: 104 additions & 0 deletions tests/common/helpers/dut_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import logging
import allure
import jinja2
import glob
import re
import os
import yaml
from tests.common.helpers.assertions import pytest_assert
from tests.common.utilities import get_host_visible_vars
from tests.common.utilities import wait_until
from collections import defaultdict
from tests.common.connections.console_host import ConsoleHost
from tests.common.utilities import get_dut_current_passwd

CONTAINER_CHECK_INTERVAL_SECS = 1
CONTAINER_RESTART_THRESHOLD_SECS = 180
BASI_PATH = os.path.dirname(os.path.abspath(__file__))

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -335,3 +343,99 @@ def get_sai_sdk_dump_file(duthost, dump_file_name):

duthost.fetch(src=compressed_dump_file, dest="/tmp/", flat=True)
allure.attach.file(compressed_dump_file, dump_file_name, extension=".tar.gz")


def create_duthost_console(duthost,localhost, conn_graph_facts, creds): # noqa F811
dut_hostname = duthost.hostname
console_host = conn_graph_facts['device_console_info'][dut_hostname]['ManagementIp']
if "/" in console_host:
console_host = console_host.split("/")[0]
console_port = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['peerport']
console_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['type']
console_username = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['proxy']

console_type = "console_" + console_type

# console password and sonic_password are lists, which may contain more than one password
sonicadmin_alt_password = localhost.host.options['variable_manager']._hostvars[dut_hostname].get(
"ansible_altpassword")
host = ConsoleHost(console_type=console_type,
console_host=console_host,
console_port=console_port,
sonic_username=creds['sonicadmin_user'],
sonic_password=[creds['sonicadmin_password'], sonicadmin_alt_password],
console_username=console_username,
console_password=creds['console_password'][console_type])
return host


def creds_on_dut(duthost):
""" read credential information according to the dut inventory """
groups = duthost.host.options['inventory_manager'].get_host(duthost.hostname).get_vars()['group_names']
groups.append("fanout")
logger.info("dut {} belongs to groups {}".format(duthost.hostname, groups))
exclude_regex_patterns = [
r'topo_.*\.yml',
r'breakout_speed\.yml',
r'lag_fanout_ports_test_vars\.yml',
r'qos\.yml',
r'sku-sensors-data\.yml',
r'mux_simulator_http_port_map\.yml'
]
ansible_folder_path = os.path.join(BASI_PATH, "../../../ansible/")
files = glob.glob(os.path.join(ansible_folder_path, "group_vars/all/*.yml"))
files += glob.glob(os.path.join(ansible_folder_path, "vars/*.yml"))
for group in groups:
files += glob.glob(os.path.join(ansible_folder_path, f"group_vars/{group}/*.yml"))
filtered_files = [
f for f in files if not re.search('|'.join(exclude_regex_patterns), f)
]

creds = {}
for f in filtered_files:
with open(f) as stream:
v = yaml.safe_load(stream)
if v is not None:
creds.update(v)
else:
logging.info("skip empty var file {}".format(f))

cred_vars = [
"sonicadmin_user",
"sonicadmin_password",
"docker_registry_host",
"docker_registry_username",
"docker_registry_password",
"public_docker_registry_host"
]
hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname]
for cred_var in cred_vars:
if cred_var in creds:
creds[cred_var] = jinja2.Template(creds[cred_var]).render(**hostvars)
# load creds for console
if "console_login" not in list(hostvars.keys()):
console_login_creds = {}
else:
console_login_creds = hostvars["console_login"]
creds["console_user"] = {}
creds["console_password"] = {}

creds["ansible_altpasswords"] = []

# If ansible_altpasswords is empty, add ansible_altpassword to it
if len(creds["ansible_altpasswords"]) == 0:
creds["ansible_altpasswords"].append(hostvars["ansible_altpassword"])

passwords = creds["ansible_altpasswords"] + [creds["sonicadmin_password"]]
creds['sonicadmin_password'] = get_dut_current_passwd(
duthost.mgmt_ip,
duthost.mgmt_ipv6,
creds['sonicadmin_user'],
passwords
)

for k, v in list(console_login_creds.items()):
creds["console_user"][k] = v["user"]
creds["console_password"][k] = v["passwd"]

return creds
60 changes: 56 additions & 4 deletions tests/common/reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from .platform.interface_utils import check_interface_status_of_up_ports
from .platform.processes_utils import wait_critical_processes
from .utilities import wait_until, get_plt_reboot_ctrl
from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs
from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs, create_duthost_console, creds_on_dut
from tests.common.fixtures.conn_graph_facts import get_graph_facts


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -188,7 +190,8 @@ def wait_for_startup(duthost, localhost, delay, timeout):
timeout=timeout,
module_ignore_errors=True)
if res.is_failed or ('msg' in res and 'Timeout' in res['msg']):
raise Exception('DUT {} did not startup'.format(hostname))
collect_mgmt_config_by_console(duthost, localhost)
raise Exception(f'DUT {hostname} did not startup. res:{res}')

logger.info('ssh has started up on {}'.format(hostname))

Expand Down Expand Up @@ -262,7 +265,10 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,
# Create a temporary file in tmpfs before reboot
logger.info('DUT {} create a file /dev/shm/test_reboot before rebooting'.format(hostname))
duthost.command('sudo touch /dev/shm/test_reboot')

wait_conlsole_connection = 5
console_thread_res = pool.apply_async(
collect_console_log, args=(duthost, localhost, timeout + wait_conlsole_connection))
time.sleep(wait_conlsole_connection)
reboot_res, dut_datetime = perform_reboot(duthost, pool, reboot_command, reboot_helper, reboot_kwargs, reboot_type)

wait_for_shutdown(duthost, localhost, delay, timeout, reboot_res)
Expand All @@ -273,7 +279,12 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,
# if wait_for_ssh flag is False, do not wait for dut to boot up
if not wait_for_ssh:
return
wait_for_startup(duthost, localhost, delay, timeout)
try:
wait_for_startup(duthost, localhost, delay, timeout)
except Exception as err:
logger.error('collecting console log thread result: {} on {}'.format(console_thread_res.get(), hostname))
pool.terminate()
raise Exception(f"dut not start:{err}")

logger.info('waiting for switch {} to initialize'.format(hostname))

Expand Down Expand Up @@ -493,3 +504,44 @@ def check_reboot_cause_history(dut, reboot_type_history_queue):
logger.error("The number of expected reboot-cause:{} is more than that of actual reboot-cuase:{}".format(
reboot_type_history_len, len(reboot_type_history_queue)))
return False


def try_create_dut_console(duthost, localhost, conn_graph_facts, creds):
try:
dut_sonsole = create_duthost_console(duthost, localhost, conn_graph_facts, creds)
except Exception as err:
logger.warning(f"Fail to create dut console. Please check console config or if console works ro not. {err}")
return None
logger.info("creating dut console succeeds")
return dut_sonsole


def collect_console_log(duthost, localhost, timeout):
logger.info("start: collect console log")
creds = creds_on_dut(duthost)
conn_graph_facts = get_graph_facts(duthost, localhost, [duthost.hostname])
dut_console = try_create_dut_console(duthost, localhost, conn_graph_facts, creds)
if dut_console:
logger.info(f"sleep {timeout} to collect console log....")
time.sleep(timeout)
dut_console.disconnect()
logger.info('end: collect console log')
else:
logger.warning("dut console is not ready, we can get log by console")


def collect_mgmt_config_by_console(duthost, localhost):
logger.info("check if dut is pingable")
localhost.shell(f"ping -c 5 {duthost.mgmt_ip}", module_ignore_errors=True)

logger.info("Start: collect mgmt config by console")
creds = creds_on_dut(duthost)
conn_graph_facts = get_graph_facts(duthost, localhost, [duthost.hostname])
dut_console = try_create_dut_console(duthost, localhost, conn_graph_facts, creds)
if dut_console:
dut_console.send_command("ip a s eth0")
dut_console.send_command("show ip int")
dut_console.disconnect()
logger.info('End: collect mgmt config by console ...')
else:
logger.warning("dut console is not ready, we can get mgmt config by console")
100 changes: 3 additions & 97 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import concurrent.futures
import os
import glob
import json
import logging
import getpass
import random
import re
from concurrent.futures import as_completed

import pytest
import yaml
import jinja2
import copy

from datetime import datetime
Expand Down Expand Up @@ -47,11 +44,9 @@
from tests.common.utilities import get_test_server_host
from tests.common.utilities import str2bool
from tests.common.utilities import safe_filename
from tests.common.utilities import get_dut_current_passwd
from tests.common.helpers.dut_utils import is_supervisor_node, is_frontend_node
from tests.common.helpers.dut_utils import is_supervisor_node, is_frontend_node, create_duthost_console, creds_on_dut
from tests.common.cache import FactsCache
from tests.common.config_reload import config_reload
from tests.common.connections.console_host import ConsoleHost
from tests.common.helpers.assertions import pytest_assert as pt_assert
from tests.common.helpers.sonic_db import AsicDbCli
from tests.common.helpers.inventory_utils import trim_inventory
Expand Down Expand Up @@ -778,77 +773,6 @@ def pdu():
return pdu


def creds_on_dut(duthost):
""" read credential information according to the dut inventory """
groups = duthost.host.options['inventory_manager'].get_host(duthost.hostname).get_vars()['group_names']
groups.append("fanout")
logger.info("dut {} belongs to groups {}".format(duthost.hostname, groups))
exclude_regex_patterns = [
r'topo_.*\.yml',
r'breakout_speed\.yml',
r'lag_fanout_ports_test_vars\.yml',
r'qos\.yml',
r'sku-sensors-data\.yml',
r'mux_simulator_http_port_map\.yml'
]
files = glob.glob("../ansible/group_vars/all/*.yml")
files += glob.glob("../ansible/vars/*.yml")
for group in groups:
files += glob.glob("../ansible/group_vars/{}/*.yml".format(group))
filtered_files = [
f for f in files if not re.search('|'.join(exclude_regex_patterns), f)
]

creds = {}
for f in filtered_files:
with open(f) as stream:
v = yaml.safe_load(stream)
if v is not None:
creds.update(v)
else:
logging.info("skip empty var file {}".format(f))

cred_vars = [
"sonicadmin_user",
"sonicadmin_password",
"docker_registry_host",
"docker_registry_username",
"docker_registry_password",
"public_docker_registry_host"
]
hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname]
for cred_var in cred_vars:
if cred_var in creds:
creds[cred_var] = jinja2.Template(creds[cred_var]).render(**hostvars)
# load creds for console
if "console_login" not in list(hostvars.keys()):
console_login_creds = {}
else:
console_login_creds = hostvars["console_login"]
creds["console_user"] = {}
creds["console_password"] = {}

creds["ansible_altpasswords"] = []

# If ansible_altpasswords is empty, add ansible_altpassword to it
if len(creds["ansible_altpasswords"]) == 0:
creds["ansible_altpasswords"].append(hostvars["ansible_altpassword"])

passwords = creds["ansible_altpasswords"] + [creds["sonicadmin_password"]]
creds['sonicadmin_password'] = get_dut_current_passwd(
duthost.mgmt_ip,
duthost.mgmt_ipv6,
creds['sonicadmin_user'],
passwords
)

for k, v in list(console_login_creds.items()):
creds["console_user"][k] = v["user"]
creds["console_password"][k] = v["passwd"]

return creds


@pytest.fixture(scope="session")
def creds(duthost):
return creds_on_dut(duthost)
Expand Down Expand Up @@ -1712,26 +1636,8 @@ def enum_rand_one_frontend_asic_index(request):
@pytest.fixture(scope="module")
def duthost_console(duthosts, enum_supervisor_dut_hostname, localhost, conn_graph_facts, creds): # noqa F811
duthost = duthosts[enum_supervisor_dut_hostname]
dut_hostname = duthost.hostname
console_host = conn_graph_facts['device_console_info'][dut_hostname]['ManagementIp']
if "/" in console_host:
console_host = console_host.split("/")[0]
console_port = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['peerport']
console_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['type']
console_username = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['proxy']

console_type = "console_" + console_type

# console password and sonic_password are lists, which may contain more than one password
sonicadmin_alt_password = localhost.host.options['variable_manager']._hostvars[dut_hostname].get(
"ansible_altpassword")
host = ConsoleHost(console_type=console_type,
console_host=console_host,
console_port=console_port,
sonic_username=creds['sonicadmin_user'],
sonic_password=[creds['sonicadmin_password'], sonicadmin_alt_password],
console_username=console_username,
console_password=creds['console_password'][console_type])
host = create_duthost_console(duthost, localhost, conn_graph_facts, creds)

yield host
host.disconnect()

Expand Down

0 comments on commit 2653915

Please sign in to comment.