tl;dr: I seem to be trying to get past clang optimizations that get the verifier to accept my proggie.
Hi, So I'm moving to use raw_syscalls:sys_exit to collect pointer contents, using maps to tell the bpf program what to copy, how many bytes, filters, etc. I'm at the start of it at this point I need to use an index to get to the right syscall arg that is a filename, starting just with "open" and "openat", that have the filename in different args, so to get this first part working I'm doing it directly in the bpf restricted C program, later this will be to maps, etc, so if I set the index as a constant, just for testing, it works, look at the "open" and "openat" calls below, later we'll see why openat is failing to augment its "filename" arg while "open" works: [root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 ? ( ): sleep/10152 ... [continued]: execve()) = 0 0.045 ( 0.004 ms): sleep/10152 brk() = 0x55ccff356000 0.074 ( 0.007 ms): sleep/10152 access(filename: , mode: R) = -1 ENOENT No such file or directory 0.089 ( 0.006 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.097 ( 0.003 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd283f0) = 0 0.103 ( 0.006 ms): sleep/10152 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ffee9c000 0.111 ( 0.002 ms): sleep/10152 close(fd: 3) = 0 0.135 ( 0.007 ms): sleep/10152 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3 0.144 ( 0.003 ms): sleep/10152 read(fd: 3, buf: 0x7ffecdd285b8, count: 832) = 832 0.150 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd28450) = 0 0.155 ( 0.005 ms): sleep/10152 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f8ffee9a000 0.166 ( 0.007 ms): sleep/10152 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f8ffe8dc000 0.175 ( 0.010 ms): sleep/10152 mprotect(start: 0x7f8ffea89000, len: 2093056) = 0 0.188 ( 0.010 ms): sleep/10152 mmap(addr: 0x7f8ffec88000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f8ffec88000 0.204 ( 0.005 ms): sleep/10152 mmap(addr: 0x7f8ffec8e000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f8ffec8e000 0.218 ( 0.002 ms): sleep/10152 close(fd: 3) = 0 0.239 ( 0.002 ms): sleep/10152 arch_prctl(option: 4098, arg2: 140256433779968) = 0 0.312 ( 0.009 ms): sleep/10152 mprotect(start: 0x7f8ffec88000, len: 16384, prot: READ) = 0 0.343 ( 0.005 ms): sleep/10152 mprotect(start: 0x55ccff1c6000, len: 4096, prot: READ) = 0 0.354 ( 0.006 ms): sleep/10152 mprotect(start: 0x7f8ffeeb6000, len: 4096, prot: READ) = 0 0.362 ( 0.019 ms): sleep/10152 munmap(addr: 0x7f8ffee9c000, len: 103334) = 0 0.476 ( 0.002 ms): sleep/10152 brk() = 0x55ccff356000 0.480 ( 0.004 ms): sleep/10152 brk(brk: 0x55ccff377000) = 0x55ccff377000 0.487 ( 0.002 ms): sleep/10152 brk() = 0x55ccff377000 0.497 ( 0.008 ms): sleep/10152 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 0.507 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7f8ffec8daa0) = 0 0.511 ( 0.006 ms): sleep/10152 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f8ff7d0d000 0.524 ( 0.002 ms): sleep/10152 close(fd: 3) = 0 0.574 (1000.140 ms): sleep/10152 nanosleep(rqtp: 0x7ffecdd29130) = 0 1000.753 ( 0.007 ms): sleep/10152 close(fd: 1) = 0 1000.767 ( 0.004 ms): sleep/10152 close(fd: 2) = 0 1000.781 ( ): sleep/10152 exit_group() [root@seventh perf]# 1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. 4 * 5 * Test it with: 6 * 7 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null 8 * 9 * This exactly matches what is marshalled into the raw_syscall:sys_enter 10 * payload expected by the 'perf trace' beautifiers. 11 * 12 * For now it just uses the existing tracepoint augmentation code in 'perf 13 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit 14 * code that will combine entry/exit in a strace like way. 15 */ 16 #include <stdio.h> 17 #include <linux/socket.h> 18 /* bpf-output associated map */ 19 struct bpf_map SEC("maps") __augmented_syscalls__ = { 20 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 21 .key_size = sizeof(int), 22 .value_size = sizeof(u32), 23 .max_entries = __NR_CPUS__, 24 }; 25 struct syscall_enter_args { 26 unsigned long long common_tp_fields; 27 long syscall_nr; 28 unsigned long args[6]; 29 }; 30 struct syscall_exit_args { 31 unsigned long long common_tp_fields; 32 long syscall_nr; 33 long ret; 34 }; 35 struct augmented_filename { 36 unsigned int size; 37 int reserved; 38 char value[256]; 39 }; 40 #define SYS_OPEN 2 41 #define SYS_OPENAT 257 42 SEC("raw_syscalls:sys_enter") 43 int sys_enter(struct syscall_enter_args *args) 44 { 45 struct { 46 struct syscall_enter_args args; 47 struct augmented_filename filename; 48 } augmented_args; 49 unsigned int len = sizeof(augmented_args); 50 unsigned int filename_arg = 6; 51 probe_read(&augmented_args.args, sizeof(augmented_args.args), args); 52 switch (augmented_args.args.syscall_nr) { 53 case SYS_OPEN: filename_arg = 0; break; 54 case SYS_OPENAT: filename_arg = 1; break; 55 } 56 if (filename_arg <= 5) { 57 augmented_args.filename.reserved = 0; 58 augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 59 sizeof(augmented_args.filename.value), 60 (const void *)args->args[0]); 61 if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { 62 len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; 63 len &= sizeof(augmented_args.filename.value) - 1; 64 } 65 } else { 66 len = sizeof(augmented_args.args); 67 } 68 perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); 69 return 0; 70 } 71 SEC("raw_syscalls:sys_exit") 72 int sys_exit(struct syscall_exit_args *args) 73 { 74 return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ 75 } 76 license(GPL); In line #60 if I change that to 1, then "openat" works and "open" doesn't, so what I wanted was to use filename_arg there as the index, now it comes from that switch, but really it'll come from userspace, that knows the syscall tables for each arch, etc. But if I do that, i.e. apply this patch to that program: --- /wb/augmented_raw_syscalls.c.old 2018-11-01 15:43:55.000394234 -0300 +++ /wb/augmented_raw_syscalls.c 2018-11-01 15:44:15.102367838 -0300 @@ -67,7 +67,7 @@ augmented_args.filename.reserved = 0; augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, sizeof(augmented_args.filename.value), - (const void *)args->args[0]); + (const void *)args->args[filename_arg]); if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; len &= sizeof(augmented_args.filename.value) - 1; Then I end up with the verifier complying, I tried various ways to get around the compiler about filename_arg being safe to use as an index, but I couldn't find the right trick, ideas? This is what I end up with when I apply that patch: [root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c' \___ Kernel verifier blocks program loading (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf trace [<options>] [<command>] or: perf trace [<options>] -- <command> [<options>] or: perf trace record [<options>] [<command>] or: perf trace record [<options>] -- <command> [<options>] -e, --event <event> event/syscall selector. use 'perf list' to list available events [root@seventh perf]# Using -v, as suggested, I get: [root@seventh perf]# trace -v -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1 bpf: builtin compilation failed: -95, try external compiler Kernel build dir is set to /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build set env: KBUILD_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x41300 set env: CLANG_EXEC=/usr/local/bin/clang unset env: CLANG_OPTIONS set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: PERF_BPF_INC_OPTIONS=-I/home/acme/lib/perf/include/bpf set env: WORKING_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build set env: CLANG_SOURCE=/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 -DLINUX_VERSION_CODE=0x41300 -I/home/acme/lib/perf/include/bpf -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h -Wno-unused-value -Wno-pointer-sign -working-directory /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build -c /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c -target bpf -O2 -o - libbpf: loading object 'tools/perf/examples/bpf/augmented_raw_syscalls.c' from buffer libbpf: section(1) .strtab, size 168, link 0, flags 0, type=3 libbpf: skip section(1) .strtab libbpf: section(2) .text, size 0, link 0, flags 6, type=1 libbpf: skip section(2) .text libbpf: section(3) raw_syscalls:sys_enter, size 376, link 0, flags 6, type=1 libbpf: found program raw_syscalls:sys_enter libbpf: section(4) .relraw_syscalls:sys_enter, size 16, link 10, flags 0, type=9 libbpf: section(5) raw_syscalls:sys_exit, size 16, link 0, flags 6, type=1 libbpf: found program raw_syscalls:sys_exit libbpf: section(6) maps, size 56, link 0, flags 3, type=1 libbpf: section(7) license, size 4, link 0, flags 3, type=1 libbpf: license of tools/perf/examples/bpf/augmented_raw_syscalls.c is GPL libbpf: section(8) version, size 4, link 0, flags 3, type=1 libbpf: kernel version of tools/perf/examples/bpf/augmented_raw_syscalls.c is 41300 libbpf: section(9) .llvm_addrsig, size 6, link 10, flags 80000000, type=1879002115 libbpf: skip section(9) .llvm_addrsig libbpf: section(10) .symtab, size 240, link 1, flags 0, type=2 libbpf: maps in tools/perf/examples/bpf/augmented_raw_syscalls.c: 2 maps in 56 bytes libbpf: map 0 is "__augmented_syscalls__" libbpf: map 1 is "__bpf_stdout__" libbpf: collecting relocating info for: 'raw_syscalls:sys_enter' libbpf: relo for 4 value 28 name 124 libbpf: relocation: insn_idx=39 libbpf: relocation: find map 1 (__augmented_syscalls__) for insn 39 bpf: config program 'raw_syscalls:sys_enter' bpf: config program 'raw_syscalls:sys_exit' libbpf: create map __bpf_stdout__: fd=3 libbpf: create map __augmented_syscalls__: fd=4 libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (bf) r6 = r1 1: (bf) r1 = r10 2: (07) r1 += -328 3: (b7) r7 = 64 4: (b7) r2 = 64 5: (bf) r3 = r6 6: (85) call bpf_probe_read#4 7: (b7) r2 = 1 8: (79) r3 = *(u64 *)(r10 -320) 9: (15) if r3 == 0x101 goto pc+1 R0=inv(id=0) R2=inv1 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 10: (b7) r2 = 6 11: (b7) r1 = 0 12: (15) if r3 == 0x2 goto pc+1 R0=inv(id=0) R1=inv0 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 13: (bf) r1 = r2 14: (25) if r1 > 0x5 goto pc+21 R0=inv(id=0) R1=inv6 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 15: (b7) r2 = 0 16: (63) *(u32 *)(r10 -260) = r2 17: (67) r1 <<= 32 18: (77) r1 >>= 32 19: (67) r1 <<= 3 20: (bf) r2 = r6 21: (0f) r2 += r1 22: (79) r3 = *(u64 *)(r2 +16) R2 invalid mem access 'inv' libbpf: -- END LOG -- libbpf: failed to load program 'raw_syscalls:sys_enter' libbpf: failed to load object 'tools/perf/examples/bpf/augmented_raw_syscalls.c' bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading) event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c' \___ Kernel verifier blocks program loading (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf trace [<options>] [<command>] or: perf trace [<options>] -- <command> [<options>] or: perf trace record [<options>] [<command>] or: perf trace record [<options>] -- <command> [<options>] -e, --event <event> event/syscall selector. use 'perf list' to list available events [root@seventh perf]#