When trying to exhaust system memory in order to exercise LMEM eviction
under OOM conditions, a gem_leak helper process may itself become a victim
of memory shortage.  If our i915 TTM VM fault handler fails to allocate a
page and responds with a SIGBUS signal when the helper process is trying
to store data in a mmaped i915 GEM object with memset then the process
crashes.  Unfortunately, such crash is not only reported on stdout, strerr
and dmesg as premature, additional result from the subtest while it is
still in progress, but also renders the final result as failed.

Starting subtest: smem-oom
Starting dynamic subtest: lmem0
Received signal SIGBUS.
Stack trace:
 #0 [fatal_sig_handler+0x17b]
 #1 [__sigaction+0x50]
 #2 [__igt_unique____real_main808+0xdbc]
 #3 [main+0x3f]
 #4 [__libc_init_first+0x8a]
 #5 [__libc_start_main+0x8b]
 #6 [_start+0x25]
Dynamic subtest lmem0: CRASH (20.804s)
Subtest smem-oom: SUCCESS (20.807s)
Received signal SIGABRT.
Stack trace:
 #0 [fatal_sig_handler+0x17b]
 #1 [__sigaction+0x50]
 #2 [pthread_kill+0x11c]
 #3 [gsignal+0x1e]
 #4 [abort+0xdf]
 #5 [<unknown>+0xdf]
 #6 [__assert_fail+0x47]
 #7 [__igt_waitchildren+0x1c0]
 #8 [igt_waitchildren_timeout+0x9d]
 #9 [intel_allocator_multiprocess_stop+0xbb]
 #10 [__igt_unique____real_main808+0x551]
 #11 [main+0x3f]
 #12 [__libc_init_first+0x8a]
 #13 [__libc_start_main+0x8b]
 #14 [_start+0x25]
(gem_lmem_swapping:2347) CRITICAL: Test assertion failure function 
test_smem_oom, file ../tests/intel/gem_lmem_swapping.c:777:
(gem_lmem_swapping:2347) CRITICAL: Failed assertion: lmem_err == 0
(gem_lmem_swapping:2347) CRITICAL: Last errno: 3, No such process
(gem_lmem_swapping:2347) CRITICAL: error: 137 != 0
Dynamic subtest lmem0 failed.
...
runner: Dynamic subtest lmem0 result when not inside a subtest. This is a test 
bug.
Subtest smem-oom: FAIL (22.672s)

Since page allocation failures are unavoidable under OOM conditions, and
the SIGBUS signal response from our TTM fault handler is correct in such
cases, catch those signals and let the helper process continue.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/5493
Signed-off-by: Janusz Krzysztofik <[email protected]>
---
That's an improved and better documented new version of my former
https://patchwork.freedesktop.org/patch/685572/

 tests/intel/gem_lmem_swapping.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/intel/gem_lmem_swapping.c b/tests/intel/gem_lmem_swapping.c
index 77e18f1a3c..514423f470 100644
--- a/tests/intel/gem_lmem_swapping.c
+++ b/tests/intel/gem_lmem_swapping.c
@@ -11,6 +11,8 @@
 #include "igt_kmod.h"
 #include "runnercomms.h"
 #include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -651,13 +653,21 @@ static void leak(uint64_t alloc)
        }
 }
 
+static sigjmp_buf sigbus_jmp;
+
+static void sigbus_handler(int sig, siginfo_t *si, void *ctx)
+{
+       siglongjmp(sigbus_jmp, 1);
+}
+
 static void gem_leak(int fd, uint64_t alloc)
 {
        uint32_t handle = gem_create(fd, alloc);
        void *buf;
 
        buf = gem_mmap_offset__fixed(fd, handle, 0, PAGE_SIZE, PROT_WRITE);
-       memset(buf, 0, PAGE_SIZE);
+       if (!igt_debug_on_f(sigsetjmp(sigbus_jmp, 1), "PID %d: SIGBUS 
caught\n", getpid()))
+               memset(buf, 0, PAGE_SIZE);
        munmap(buf, PAGE_SIZE);
 
        gem_madvise(fd, handle, I915_MADV_DONTNEED);
@@ -745,8 +755,14 @@ static void test_smem_oom(int i915,
                                }
                        }
                        igt_fork(child, 1) {
+                               struct sigaction sa = {
+                                       .sa_sigaction = sigbus_handler,
+                                       .sa_flags = SA_SIGINFO | SA_NODEFER,
+                               };
                                int fd = drm_reopen_driver(i915);
 
+                               sigaction(SIGBUS, &sa, NULL);
+
                                for (int pass = 0; pass < num_alloc; pass++) {
                                        if (READ_ONCE(*lmem_done))
                                                break;
-- 
2.52.0

Reply via email to