From 2653915084523392b469c01beeb2531b5294ea0c Mon Sep 17 00:00:00 2001 From: jbao Date: Thu, 22 Aug 2024 14:03:41 +0300 Subject: [PATCH] Add debuggability for reboot function 1. Add function to collect console log from starting reboot to dut up 2. When dut is not up, check if dut is pingable and collect the mgmt interface config Change-Id: I700340d3fbfc00d61bc6e508ac5be4e4dc0a25e6 --- tests/common/helpers/dut_utils.py | 104 ++++++++++++++++++++++++++++++ tests/common/reboot.py | 60 +++++++++++++++-- tests/conftest.py | 100 +--------------------------- 3 files changed, 163 insertions(+), 101 deletions(-) diff --git a/tests/common/helpers/dut_utils.py b/tests/common/helpers/dut_utils.py index 68b3ea3b50..435529034c 100644 --- a/tests/common/helpers/dut_utils.py +++ b/tests/common/helpers/dut_utils.py @@ -1,12 +1,20 @@ import logging import allure +import jinja2 +import glob +import re +import os +import yaml from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import get_host_visible_vars from tests.common.utilities import wait_until from collections import defaultdict +from tests.common.connections.console_host import ConsoleHost +from tests.common.utilities import get_dut_current_passwd CONTAINER_CHECK_INTERVAL_SECS = 1 CONTAINER_RESTART_THRESHOLD_SECS = 180 +BASI_PATH = os.path.dirname(os.path.abspath(__file__)) logger = logging.getLogger(__name__) @@ -335,3 +343,99 @@ def get_sai_sdk_dump_file(duthost, dump_file_name): duthost.fetch(src=compressed_dump_file, dest="/tmp/", flat=True) allure.attach.file(compressed_dump_file, dump_file_name, extension=".tar.gz") + + +def create_duthost_console(duthost,localhost, conn_graph_facts, creds): # noqa F811 + dut_hostname = duthost.hostname + console_host = conn_graph_facts['device_console_info'][dut_hostname]['ManagementIp'] + if "/" in console_host: + console_host = console_host.split("/")[0] + console_port = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['peerport'] + console_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['type'] + console_username = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['proxy'] + + console_type = "console_" + console_type + + # console password and sonic_password are lists, which may contain more than one password + sonicadmin_alt_password = localhost.host.options['variable_manager']._hostvars[dut_hostname].get( + "ansible_altpassword") + host = ConsoleHost(console_type=console_type, + console_host=console_host, + console_port=console_port, + sonic_username=creds['sonicadmin_user'], + sonic_password=[creds['sonicadmin_password'], sonicadmin_alt_password], + console_username=console_username, + console_password=creds['console_password'][console_type]) + return host + + +def creds_on_dut(duthost): + """ read credential information according to the dut inventory """ + groups = duthost.host.options['inventory_manager'].get_host(duthost.hostname).get_vars()['group_names'] + groups.append("fanout") + logger.info("dut {} belongs to groups {}".format(duthost.hostname, groups)) + exclude_regex_patterns = [ + r'topo_.*\.yml', + r'breakout_speed\.yml', + r'lag_fanout_ports_test_vars\.yml', + r'qos\.yml', + r'sku-sensors-data\.yml', + r'mux_simulator_http_port_map\.yml' + ] + ansible_folder_path = os.path.join(BASI_PATH, "../../../ansible/") + files = glob.glob(os.path.join(ansible_folder_path, "group_vars/all/*.yml")) + files += glob.glob(os.path.join(ansible_folder_path, "vars/*.yml")) + for group in groups: + files += glob.glob(os.path.join(ansible_folder_path, f"group_vars/{group}/*.yml")) + filtered_files = [ + f for f in files if not re.search('|'.join(exclude_regex_patterns), f) + ] + + creds = {} + for f in filtered_files: + with open(f) as stream: + v = yaml.safe_load(stream) + if v is not None: + creds.update(v) + else: + logging.info("skip empty var file {}".format(f)) + + cred_vars = [ + "sonicadmin_user", + "sonicadmin_password", + "docker_registry_host", + "docker_registry_username", + "docker_registry_password", + "public_docker_registry_host" + ] + hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname] + for cred_var in cred_vars: + if cred_var in creds: + creds[cred_var] = jinja2.Template(creds[cred_var]).render(**hostvars) + # load creds for console + if "console_login" not in list(hostvars.keys()): + console_login_creds = {} + else: + console_login_creds = hostvars["console_login"] + creds["console_user"] = {} + creds["console_password"] = {} + + creds["ansible_altpasswords"] = [] + + # If ansible_altpasswords is empty, add ansible_altpassword to it + if len(creds["ansible_altpasswords"]) == 0: + creds["ansible_altpasswords"].append(hostvars["ansible_altpassword"]) + + passwords = creds["ansible_altpasswords"] + [creds["sonicadmin_password"]] + creds['sonicadmin_password'] = get_dut_current_passwd( + duthost.mgmt_ip, + duthost.mgmt_ipv6, + creds['sonicadmin_user'], + passwords + ) + + for k, v in list(console_login_creds.items()): + creds["console_user"][k] = v["user"] + creds["console_password"][k] = v["passwd"] + + return creds diff --git a/tests/common/reboot.py b/tests/common/reboot.py index 5ca5d6e929..d70e087ed9 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -11,7 +11,9 @@ from .platform.interface_utils import check_interface_status_of_up_ports from .platform.processes_utils import wait_critical_processes from .utilities import wait_until, get_plt_reboot_ctrl -from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs +from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs, create_duthost_console, creds_on_dut +from tests.common.fixtures.conn_graph_facts import get_graph_facts + logger = logging.getLogger(__name__) @@ -188,7 +190,8 @@ def wait_for_startup(duthost, localhost, delay, timeout): timeout=timeout, module_ignore_errors=True) if res.is_failed or ('msg' in res and 'Timeout' in res['msg']): - raise Exception('DUT {} did not startup'.format(hostname)) + collect_mgmt_config_by_console(duthost, localhost) + raise Exception(f'DUT {hostname} did not startup. res:{res}') logger.info('ssh has started up on {}'.format(hostname)) @@ -262,7 +265,10 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, # Create a temporary file in tmpfs before reboot logger.info('DUT {} create a file /dev/shm/test_reboot before rebooting'.format(hostname)) duthost.command('sudo touch /dev/shm/test_reboot') - + wait_conlsole_connection = 5 + console_thread_res = pool.apply_async( + collect_console_log, args=(duthost, localhost, timeout + wait_conlsole_connection)) + time.sleep(wait_conlsole_connection) reboot_res, dut_datetime = perform_reboot(duthost, pool, reboot_command, reboot_helper, reboot_kwargs, reboot_type) wait_for_shutdown(duthost, localhost, delay, timeout, reboot_res) @@ -273,7 +279,12 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, # if wait_for_ssh flag is False, do not wait for dut to boot up if not wait_for_ssh: return - wait_for_startup(duthost, localhost, delay, timeout) + try: + wait_for_startup(duthost, localhost, delay, timeout) + except Exception as err: + logger.error('collecting console log thread result: {} on {}'.format(console_thread_res.get(), hostname)) + pool.terminate() + raise Exception(f"dut not start:{err}") logger.info('waiting for switch {} to initialize'.format(hostname)) @@ -493,3 +504,44 @@ def check_reboot_cause_history(dut, reboot_type_history_queue): logger.error("The number of expected reboot-cause:{} is more than that of actual reboot-cuase:{}".format( reboot_type_history_len, len(reboot_type_history_queue))) return False + + +def try_create_dut_console(duthost, localhost, conn_graph_facts, creds): + try: + dut_sonsole = create_duthost_console(duthost, localhost, conn_graph_facts, creds) + except Exception as err: + logger.warning(f"Fail to create dut console. Please check console config or if console works ro not. {err}") + return None + logger.info("creating dut console succeeds") + return dut_sonsole + + +def collect_console_log(duthost, localhost, timeout): + logger.info("start: collect console log") + creds = creds_on_dut(duthost) + conn_graph_facts = get_graph_facts(duthost, localhost, [duthost.hostname]) + dut_console = try_create_dut_console(duthost, localhost, conn_graph_facts, creds) + if dut_console: + logger.info(f"sleep {timeout} to collect console log....") + time.sleep(timeout) + dut_console.disconnect() + logger.info('end: collect console log') + else: + logger.warning("dut console is not ready, we can get log by console") + + +def collect_mgmt_config_by_console(duthost, localhost): + logger.info("check if dut is pingable") + localhost.shell(f"ping -c 5 {duthost.mgmt_ip}", module_ignore_errors=True) + + logger.info("Start: collect mgmt config by console") + creds = creds_on_dut(duthost) + conn_graph_facts = get_graph_facts(duthost, localhost, [duthost.hostname]) + dut_console = try_create_dut_console(duthost, localhost, conn_graph_facts, creds) + if dut_console: + dut_console.send_command("ip a s eth0") + dut_console.send_command("show ip int") + dut_console.disconnect() + logger.info('End: collect mgmt config by console ...') + else: + logger.warning("dut console is not ready, we can get mgmt config by console") diff --git a/tests/conftest.py b/tests/conftest.py index df412fbd41..caccdf7bc7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,16 +1,13 @@ import concurrent.futures import os -import glob import json import logging import getpass import random -import re from concurrent.futures import as_completed import pytest import yaml -import jinja2 import copy from datetime import datetime @@ -47,11 +44,9 @@ from tests.common.utilities import get_test_server_host from tests.common.utilities import str2bool from tests.common.utilities import safe_filename -from tests.common.utilities import get_dut_current_passwd -from tests.common.helpers.dut_utils import is_supervisor_node, is_frontend_node +from tests.common.helpers.dut_utils import is_supervisor_node, is_frontend_node, create_duthost_console, creds_on_dut from tests.common.cache import FactsCache from tests.common.config_reload import config_reload -from tests.common.connections.console_host import ConsoleHost from tests.common.helpers.assertions import pytest_assert as pt_assert from tests.common.helpers.sonic_db import AsicDbCli from tests.common.helpers.inventory_utils import trim_inventory @@ -778,77 +773,6 @@ def pdu(): return pdu -def creds_on_dut(duthost): - """ read credential information according to the dut inventory """ - groups = duthost.host.options['inventory_manager'].get_host(duthost.hostname).get_vars()['group_names'] - groups.append("fanout") - logger.info("dut {} belongs to groups {}".format(duthost.hostname, groups)) - exclude_regex_patterns = [ - r'topo_.*\.yml', - r'breakout_speed\.yml', - r'lag_fanout_ports_test_vars\.yml', - r'qos\.yml', - r'sku-sensors-data\.yml', - r'mux_simulator_http_port_map\.yml' - ] - files = glob.glob("../ansible/group_vars/all/*.yml") - files += glob.glob("../ansible/vars/*.yml") - for group in groups: - files += glob.glob("../ansible/group_vars/{}/*.yml".format(group)) - filtered_files = [ - f for f in files if not re.search('|'.join(exclude_regex_patterns), f) - ] - - creds = {} - for f in filtered_files: - with open(f) as stream: - v = yaml.safe_load(stream) - if v is not None: - creds.update(v) - else: - logging.info("skip empty var file {}".format(f)) - - cred_vars = [ - "sonicadmin_user", - "sonicadmin_password", - "docker_registry_host", - "docker_registry_username", - "docker_registry_password", - "public_docker_registry_host" - ] - hostvars = duthost.host.options['variable_manager']._hostvars[duthost.hostname] - for cred_var in cred_vars: - if cred_var in creds: - creds[cred_var] = jinja2.Template(creds[cred_var]).render(**hostvars) - # load creds for console - if "console_login" not in list(hostvars.keys()): - console_login_creds = {} - else: - console_login_creds = hostvars["console_login"] - creds["console_user"] = {} - creds["console_password"] = {} - - creds["ansible_altpasswords"] = [] - - # If ansible_altpasswords is empty, add ansible_altpassword to it - if len(creds["ansible_altpasswords"]) == 0: - creds["ansible_altpasswords"].append(hostvars["ansible_altpassword"]) - - passwords = creds["ansible_altpasswords"] + [creds["sonicadmin_password"]] - creds['sonicadmin_password'] = get_dut_current_passwd( - duthost.mgmt_ip, - duthost.mgmt_ipv6, - creds['sonicadmin_user'], - passwords - ) - - for k, v in list(console_login_creds.items()): - creds["console_user"][k] = v["user"] - creds["console_password"][k] = v["passwd"] - - return creds - - @pytest.fixture(scope="session") def creds(duthost): return creds_on_dut(duthost) @@ -1712,26 +1636,8 @@ def enum_rand_one_frontend_asic_index(request): @pytest.fixture(scope="module") def duthost_console(duthosts, enum_supervisor_dut_hostname, localhost, conn_graph_facts, creds): # noqa F811 duthost = duthosts[enum_supervisor_dut_hostname] - dut_hostname = duthost.hostname - console_host = conn_graph_facts['device_console_info'][dut_hostname]['ManagementIp'] - if "/" in console_host: - console_host = console_host.split("/")[0] - console_port = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['peerport'] - console_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['type'] - console_username = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['proxy'] - - console_type = "console_" + console_type - - # console password and sonic_password are lists, which may contain more than one password - sonicadmin_alt_password = localhost.host.options['variable_manager']._hostvars[dut_hostname].get( - "ansible_altpassword") - host = ConsoleHost(console_type=console_type, - console_host=console_host, - console_port=console_port, - sonic_username=creds['sonicadmin_user'], - sonic_password=[creds['sonicadmin_password'], sonicadmin_alt_password], - console_username=console_username, - console_password=creds['console_password'][console_type]) + host = create_duthost_console(duthost, localhost, conn_graph_facts, creds) + yield host host.disconnect()