Skip to content

Commit

Permalink
Refactor watchdog test case
Browse files Browse the repository at this point in the history
 - one prerequisite: Set watchdog config variable before starting the tests
  • Loading branch information
LiaoU3 committed Apr 1, 2024
1 parent 6495dd0 commit 4d22b21
Show file tree
Hide file tree
Showing 3 changed files with 253 additions and 54 deletions.
84 changes: 56 additions & 28 deletions providers/base/bin/watchdog_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,29 @@
# You should have received a copy of the GNU General Public License
# along with Checkbox. If not, see <http://www.gnu.org/licenses/>.

'''
Watchdog implementation on both classic and core image no longer rely
on watchdogd service since 20.04, with this change watchdog/systemd-config
tests only systemd configuration on 20.04 and later series while keeping
the original test for prior releases
'''

import subprocess
import argparse
import sys

from checkbox_support.snap_utils.system import on_ubuntucore
from checkbox_support.snap_utils.system import get_series


def watchdog_argparse():
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--check_time', action='store_true')
parser.add_argument('-s', '--check_service', action='store_true')

return parser.parse_args()


def get_systemd_wdt_usec():
"""
Return value of systemd-watchdog RuntimeWatchdogUSec
Expand Down Expand Up @@ -53,38 +70,49 @@ def watchdog_service_check():


def main():
runtime_watchdog_usec = get_systemd_wdt_usec()
systemd_wdt_configured = (runtime_watchdog_usec != "0")
wdt_service_configured = watchdog_service_check()
args = watchdog_argparse()

ubuntu_version = int(get_series().split(".")[0])
watchdog_config_ready = True

if (ubuntu_version >= 20) or (on_ubuntucore()):
if not systemd_wdt_configured:
print("systemd watchdog should be enabled but reset timeout: "
"{}".format(runtime_watchdog_usec))
watchdog_config_ready = False
if wdt_service_configured:
print("found unexpected active watchdog.service unit")
watchdog_config_ready = False
if watchdog_config_ready:
print("systemd watchdog enabled, reset timeout: {}".format(
runtime_watchdog_usec))
print("watchdog.service is not active")
else:
if systemd_wdt_configured:
print("systemd watchdog should not be enabled but reset timeout: "
"{}".format(runtime_watchdog_usec))
watchdog_config_ready = False
if not wdt_service_configured:
print("watchdog.service unit does not report as active")
watchdog_config_ready = False
if watchdog_config_ready:
print("systemd watchdog disabled")
print("watchdog.service active")
if args.check_time:
runtime_watchdog_usec = get_systemd_wdt_usec()
is_systemd_wdt_configured = (runtime_watchdog_usec != "0")

if ubuntu_version >= 20 or on_ubuntucore():
if not is_systemd_wdt_configured:
print("systemd watchdog should be enabled but reset timeout: "
"{}".format(runtime_watchdog_usec))
watchdog_config_ready = False
if watchdog_config_ready:
print("systemd watchdog enabled, reset timeout: {}".format(
runtime_watchdog_usec))
else:
if is_systemd_wdt_configured:
print("systemd watchdog should not be enabled but reset"
" timeout: {}".format(runtime_watchdog_usec))
watchdog_config_ready = False
if watchdog_config_ready:
print("systemd watchdog disabled")

if args.check_service:
is_wdt_service_configured = watchdog_service_check()

if ubuntu_version >= 20 or on_ubuntucore():
if is_wdt_service_configured:
print("found unexpected active watchdog.service unit")
watchdog_config_ready = False
if watchdog_config_ready:
print("watchdog.service is not active")
else:
if not is_wdt_service_configured:
print("watchdog.service unit does not report as active")
watchdog_config_ready = False
if watchdog_config_ready:
print("watchdog.service active")

raise SystemExit(not watchdog_config_ready)
return not watchdog_config_ready


if __name__ == "__main__":
main()
sys.exit(main())
209 changes: 187 additions & 22 deletions providers/base/units/watchdog/jobs.pxu
Original file line number Diff line number Diff line change
@@ -1,43 +1,208 @@
id: watchdog/check-timeout
category_id: com.canonical.plainbox::power-management
_summary: Check the timeout of Hardware Watchdog
_description:
Check the value of RuntimeWatchdogUSec shouldn't be 0 in OEM image.
It means the systemd watchdog is disabled if the value is 0.
flags: simple
imports: from com.canonical.plainbox import manifest
requires:
manifest.has_hardware_watchdog == 'True'
image_source_and_type.source == 'oem'
command: watchdog_config_test.py --check_time


id: watchdog/check-service
category_id: com.canonical.plainbox::power-management
_summary: Check the watchdog.service is enabled or not
_description:
Check the watchdog.service is enabled or not.
Watchdog implementation on both classic and core image no longer rely
on watchdogd service since 20.04.
flags: simple
imports: from com.canonical.plainbox import manifest
requires:
manifest.has_hardware_watchdog == 'True'
command: watchdog_config_test.py --check_service


id: watchdog/probe-module
category_id: com.canonical.plainbox::power-management
_summary: Probe the suitable module for watchdog
_description:
Probe the suitable module of watchdog based on the environment variable 'WATCHDOG_TYPE' in config file.
This job only be execute on the Stock Classic image because the module isn't probed automatically.
user: root
flags: simple
imports: from com.canonical.plainbox import manifest
requires:
manifest.has_hardware_watchdog == 'True'
image_source_and_type.source == 'stock'
image_source_and_type.type == 'classic'
environ: WATCHDOG_TYPE
command:
if [[ ! -n "$WATCHDOG_TYPE" ]]; then
>&2 echo "WATCHDOG_TYPE is not available"
exit 1
fi
echo "Trying to probe '$WATCHDOG_TYPE' module"
modprobe $WATCHDOG_TYPE
if [[ "$?" -ne 0 ]]; then
>&2 echo "Unable to probe the '$WATCHDOG_TYPE' module"
exit 1
fi
lsmod | grep -q -i $WATCHDOG_TYPE
if [[ "$?" -ne 0 ]]; then
>&2 echo "Unable to find the '$WATCHDOG_TYPE' module after probing it"
exit 1
fi


id: watchdog/detect
category_id: com.canonical.plainbox::power-management
_summary: Detect presence of a Hardware Watchdog
_description:
Detect the watchdog is under the /sys/class/watchdog/ path and no other type of watchdog
flags: simple
imports: from com.canonical.plainbox import manifest
requires: manifest.has_hardware_watchdog == 'True'
command: udev_resource.py -f WATCHDOG
requires:
manifest.has_hardware_watchdog == 'True'
environ: WATCHDOG_TYPE WATCHDOG_IDENTITY
command:
source=$(checkbox-support-image_checker -s| awk -F ": " '{print $2}')
if [[ $source == "oem" ]]; then
udev_resource.py -f WATCHDOG
elif [[ $source == "stock" ]]; then
if [ -z "$WATCHDOG_TYPE" ] || [ -z "$WATCHDOG_IDENTITY" ]; then
>&2 echo "Please define the WATCHDOG_TYPE and WATCHDOG_IDENTITY in advance"
exit 1
fi
WATCHDOGDS=$(find /sys/class/watchdog/watchdog*[0-9])
EXIT=$?
for w in $WATCHDOGDS; do
identity=$(cat "$w"/identity)
echo "Identity of $w: $identity"
if [[ "$identity" != "$WATCHDOG_IDENTITY" ]]; then
>&2 echo "Find an unmatched watchdog"
EXIT=1
fi
done
exit $EXIT
else
>&2 echo "Unrecognized image source: $source"
exit 1
fi

id: watchdog/systemd-config
_summary: Check if the hardware watchdog is properly configured
template-engine: jinja2
command: watchdog_config_test.py

id: watchdog/set-timeout
category_id: com.canonical.plainbox::power-management
_summary: Configure the timeout for Hardware Watchdog
_description:
Configure the value of RuntimeWatchdogSec
flags: simple
imports: from com.canonical.plainbox import manifest
requires: manifest.has_hardware_watchdog == 'True'

id: watchdog/trigger-system-reset-auto
depends: watchdog/systemd-config
_summary: Test that the watchdog module can trigger a system reset
requires:
manifest.has_hardware_watchdog == 'True'
image_source_and_type.source == 'stock'
depends:
watchdog/check-service
watchdog/detect
user: root
command:
sync
sleep 5
echo 1 > /proc/sys/kernel/sysrq
echo 0 > /proc/sys/kernel/panic
echo c > /proc/sysrq-trigger
flags: preserve-locale noreturn autorestart
DEFAULT_WATCHDOG="^#RuntimeWatchdogSec"
search_pattern="#RuntimeWatchdogSec=[0-9]*s*"
for i in {0..1}
do
result=`grep $DEFAULT_WATCHDOG /etc/systemd/system.conf`
if [[ -n "$result" && $i -eq 0 ]]; then
echo "Modifying the watchdog timeout"
sed -i "s/$search_pattern/RuntimeWatchdogSec=35/g" /etc/systemd/system.conf
elif [[ $i -eq 1 ]]; then
if [[ -n "$result" ]]; then
>&2 echo "Failed to set watchdog timeout"; RET=1
else
echo "Watchdog timeout has been configured, reloading configuration"
systemctl daemon-reexec; RET=$?
if [[ $RET -ne 0 ]]; then
>&2 echo "Failed to reloading configuration"
fi
fi
else
echo "Watchdog timeout is already set"
break
fi
done
exit $RET


id: watchdog/revert-timeout
category_id: com.canonical.plainbox::power-management
_summary: Restore the timeout for Hardware Watchdog
_description:
Restore the value of RuntimeWatchdogSec
flags: simple
imports: from com.canonical.plainbox import manifest
requires:
manifest.has_hardware_watchdog == 'True'
image_source_and_type.source == 'stock'
depends:
watchdog/set-timeout
user: root
command:
SET_WATCHDOG="RuntimeWatchdogSec=35"
for i in {0..1}
do
result=`grep $SET_WATCHDOG /etc/systemd/system.conf`
if [[ "$result" == "$SET_WATCHDOG" && $i -eq 0 ]]; then
echo "Modifying the watchdog timeout"
sed -i "s/$SET_WATCHDOG/#RuntimeWatchdogSec=0/g" /etc/systemd/system.conf
elif [[ $i -eq 1 ]]; then
if [[ "$result" == "$SET_WATCHDOG" ]]; then
>&2 echo "Failed to revert watchdog timeout"; RET=1
else
echo "Watchdog timeout has been configured, reloading configuration"
systemctl daemon-reexec; RET=$?
if [[ $RET -ne 0 ]]; then
>&2 echo "Failed to reloading configuration"
fi
fi
else
timeout_value=`grep RuntimeWatchdogSec /etc/systemd/system.conf | awk -F '=' {'print $2'}`
echo "The watchdog timeout is $timeout_value now, not the value we set in previouse job"
echo "No need to revert watchdog timeout"
break
fi
done
exit $RET


id: watchdog/trigger-system-reset-auto
plugin: shell
category_id: com.canonical.plainbox::power-management
_summary: Test that the watchdog module can trigger a system reset
user: root
flags: noreturn autorestart
estimated_duration: 60
depends:
watchdog/check-service
watchdog/detect
command:
sync
sleep 5
echo 1 > /proc/sys/kernel/sysrq
echo 0 > /proc/sys/kernel/panic
echo c > /proc/sysrq-trigger


id: watchdog/post-trigger-system-reset-auto
after: watchdog/trigger-system-reset-auto
plugin: shell
category_id: com.canonical.plainbox::power-management
_summary: Post watchdog reset service check
_description: Check there are no failed services after the watchdog triggered
unit: job
plugin: shell
command: failed_service_check.sh
estimated_duration: 1.0
imports: from com.canonical.plainbox import manifest
requires: manifest.has_hardware_watchdog == 'True'
requires:
manifest.has_hardware_watchdog == 'True'
depends:
watchdog/trigger-system-reset-auto
command: failed_service_check.sh
14 changes: 10 additions & 4 deletions providers/base/units/watchdog/test-plan.pxu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
id: watchdog-full
unit: test plan
_name: Watchdog tests
_description: Watchdog tests for Ubuntu Core devices
_description: Watchdog tests
include:
nested_part:
watchdog-manual
Expand All @@ -10,15 +10,21 @@ nested_part:
id: watchdog-manual
unit: test plan
_name: Manual watchdog tests
_description: Manual watchdog tests for Ubuntu Core devices
_description: Manual watchdog tests
include:

id: watchdog-automated
unit: test plan
_name: Automated watchdog tests
_description: Automated watchdog tests for Ubuntu Core devices
_description: Automated watchdog tests
include:
watchdog/check-timeout
watchdog/check-service
watchdog/probe-module
watchdog/detect
watchdog/systemd-config
watchdog/set-timeout
watchdog/trigger-system-reset-auto
watchdog/post-trigger-system-reset-auto
watchdog/revert-timeout
bootstrap_include:
image_source_and_type

0 comments on commit 4d22b21

Please sign in to comment.