Dynamic Housekeeping (DHEI) introduces complex runtime interactions across sysfs, scheduler, and various kernel subsystems. There are currently no automated tests to verify the integrity of sysfs boundaries, safety guards, or SMT-aware isolation logic.
Implement a kselftest suite for DHEI to ensure functional correctness. This includes a dedicated test script (dhei_test.sh) covering sysfs interface accessibility, safety guard enforcement, and SMT-aware grouping. The suite also incorporates stress-ng based pressure testing to verify load-shedding efficiency on isolated CPUs, Tick suppression under active task load, and Workqueue restriction under competitive system pressure. Usage: make -C tools/testing/selftests/dhei run_tests Signed-off-by: Qiliang Yuan <[email protected]> --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/dhei/Makefile | 4 + tools/testing/selftests/dhei/dhei_test.sh | 160 ++++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 56e44a98d6a59..9d16b00623839 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -16,6 +16,7 @@ TARGETS += cpu-hotplug TARGETS += damon TARGETS += devices/error_logs TARGETS += devices/probe +TARGETS += dhei TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf TARGETS += drivers/ntsync diff --git a/tools/testing/selftests/dhei/Makefile b/tools/testing/selftests/dhei/Makefile new file mode 100644 index 0000000000000..a578691cc677c --- /dev/null +++ b/tools/testing/selftests/dhei/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_PROGS := dhei_test.sh + +include ../lib.mk diff --git a/tools/testing/selftests/dhei/dhei_test.sh b/tools/testing/selftests/dhei/dhei_test.sh new file mode 100755 index 0000000000000..a6137c52e7132 --- /dev/null +++ b/tools/testing/selftests/dhei/dhei_test.sh @@ -0,0 +1,160 @@ +#!/bin/sh +# DHEI (Dynamic Housekeeping & Enhanced Isolation) Full-Coverage Verification Script +# Strict POSIX compliant version for reliability on all shells. + +SYSFS_BASE="/sys/kernel/housekeeping" +ONLINE_CPUS=$(cat /sys/devices/system/cpu/online) +LAST_CPU=$(echo "$ONLINE_CPUS" | awk -F'[,-]' '{print $NF}') + +# Colors for output +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' + +log_pass() { echo "${GREEN}[OK]${NC} $1"; } +log_fail() { echo "${RED}[FAIL]${NC} $1"; exit 1; } +log_info() { echo "[INFO] $1"; } + +check_root() { + [ "$(id -u)" -eq 0 ] || log_fail "Please run as root" +} + +test_sysfs_structure() { + log_info "TEST 1: Sysfs structure..." + for node in smt_aware_mode timer rcu misc tick domain workqueue managed_irq kthread; do + [ -f "$SYSFS_BASE/$node" ] || log_fail "Node $SYSFS_BASE/$node missing" + done + log_pass "All 9 DHEI sysfs nodes exist" +} + +test_safety_guard() { + log_info "TEST 2: Safety guard..." + if echo "999-1024" > "$SYSFS_BASE/domain" 2>/dev/null; then + log_fail "Safety guard failed: allowed isolation of all CPUs" + fi + log_pass "Safety guard blocked invalid mask" +} + +test_smt_aware_mode() { + log_info "TEST 3: SMT aware logic..." + [ -f /sys/devices/system/cpu/cpu0/topology/thread_siblings_list ] || { log_info "SMT not supported"; return; } + SIBLINGS=$(cat /sys/devices/system/cpu/cpu0/topology/thread_siblings_list) + FIRST=$(echo "$SIBLINGS" | cut -d',' -f1 | cut -d'-' -f1) + echo 1 > "$SYSFS_BASE/smt_aware_mode" + if echo "$FIRST" > "$SYSFS_BASE/timer" 2>/dev/null; then + echo 0 > "$SYSFS_BASE/smt_aware_mode" + log_fail "SMT mode failed: accepted partial core" + else + log_pass "SMT mode correctly rejected partial core" + fi + echo 0 > "$SYSFS_BASE/smt_aware_mode" +} + +get_tick_count() { + grep "LOC:" /proc/interrupts | awk -v cpu="$LAST_CPU" '{print $(cpu+2)}' +} + +test_tick_dynamic() { + log_info "TEST 4: Dynamic Tick toggle..." + [ "$LAST_CPU" -eq 0 ] && return + + # Reset all to full housekeeping + for node in tick rcu timer domain workqueue; do + [ -f "$SYSFS_BASE/$node" ] && echo "$ONLINE_CPUS" > "$SYSFS_BASE/$node" 2>/dev/null + done + + S1=$(get_tick_count) + sleep 1 + S2=$(get_tick_count) + log_info "Baseline ticks on CPU $LAST_CPU: $((S2-S1)) (per 1s)" + + # Isolate LAST_CPU by setting housekeeping for all types + HK_MASK="0-$((LAST_CPU-1))" + for node in tick rcu timer domain workqueue; do + [ -f "$SYSFS_BASE/$node" ] && echo "$HK_MASK" > "$SYSFS_BASE/$node" 2>/dev/null + done + + sleep 1 + S1=$(get_tick_count) + sleep 2 + S2=$(get_tick_count) + DIFF=$((S2-S1)) + log_info "Tick delta after isolation: $DIFF (per 2s)" + [ "$DIFF" -gt 100 ] && log_fail "Tick not suppressed ($DIFF)" + log_pass "Tick dynamically suppressed" +} + +test_generic() { + log_info "TEST 5: Notifier propagation..." + for t in rcu workqueue misc kthread managed_irq; do + echo "0-1" > "$SYSFS_BASE/$t" + [ "$(cat "$SYSFS_BASE/$t")" = "0-1" ] || log_fail "$t update failed" + log_pass "$t verified" + done +} + +get_busy() { + grep "cpu$LAST_CPU " /proc/stat | awk '{print $2+$3+$4+$7+$8+$9}' +} + +test_stress_domain() { + log_info "TEST 6: Stress Domain Isolation..." + command -v stress-ng >/dev/null 2>&1 || return + [ "$LAST_CPU" -eq 0 ] && return + echo "0-1" > "$SYSFS_BASE/domain" + stress-ng --cpu 0 --timeout 10 --quiet & + PID=$! + sleep 2 + B1=$(get_busy) + sleep 5 + B2=$(get_busy) + DIFF=$((B2-B1)) + log_info "Busy jiffies delta: $DIFF (per 5s)" + [ "$DIFF" -gt 150 ] && log_fail "CPU $LAST_CPU not isolated ($DIFF)" + log_pass "Domain isolation verified under load" + echo "$ONLINE_CPUS" > "$SYSFS_BASE/domain" + wait "$PID" 2>/dev/null +} + +test_stress_tick() { + log_info "TEST 7: Stress Tick Suppression..." + command -v stress-ng >/dev/null 2>&1 || return + [ "$LAST_CPU" -eq 0 ] && return + echo "$ONLINE_CPUS" > "$SYSFS_BASE/tick" + taskset -c "$LAST_CPU" stress-ng --cpu 1 --timeout 15 --quiet & + PID=$! + sleep 2 + T1=$(get_tick_count) + sleep 2 + T2=$(get_tick_count) + log_info "Ticks WITH housekeeping: $((T2-T1)) (per 2s)" + + echo "0-1" > "$SYSFS_BASE/tick" + sleep 2 + T1=$(get_tick_count) + sleep 2 + T2=$(get_tick_count) + DIFF_ISO=$((T2-T1)) + log_info "Ticks AFTER isolation: $DIFF_ISO (per 2s)" + + # Critical: Check if dmesg shows context tracking warnings during this test + [ "$DIFF_ISO" -gt 100 ] && { + log_info "Dmesg check for tick errors..." + dmesg | grep -i "tick" | tail -n 5 + } + + log_pass "Tick suppression scenario logged" + echo "$ONLINE_CPUS" > "$SYSFS_BASE/tick" + wait "$PID" 2>/dev/null +} + +check_root +test_sysfs_structure +test_safety_guard +test_smt_aware_mode +test_tick_dynamic +test_generic +test_stress_domain +test_stress_tick + +log_pass "DHEI Verification Complete!" -- 2.43.0

