tl;dr: I seem to be trying to get past clang optimizations that get the
       verifier to accept my proggie.

Hi,

        So I'm moving to use raw_syscalls:sys_exit to collect pointer
contents, using maps to tell the bpf program what to copy, how many
bytes, filters, etc.

        I'm at the start of it at this point I need to use an index to
get to the right syscall arg that is a filename, starting just with
"open" and "openat", that have the filename in different args, so to get
this first part working I'm doing it directly in the bpf restricted C
program, later this will be to maps, etc, so if I set the index as a
constant, just for testing, it works, look at the "open" and "openat"
calls below, later we'll see why openat is failing to augment its
"filename" arg while "open" works:

[root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c 
sleep 1
         ? (         ): sleep/10152  ... [continued]: execve()) = 0
     0.045 ( 0.004 ms): sleep/10152 brk() = 0x55ccff356000
     0.074 ( 0.007 ms): sleep/10152 access(filename: , mode: R) = -1 ENOENT No 
such file or directory
     0.089 ( 0.006 ms): sleep/10152 openat(dfd: CWD, filename: , flags: 
CLOEXEC) = 3
     0.097 ( 0.003 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd283f0) = 0
     0.103 ( 0.006 ms): sleep/10152 mmap(len: 103334, prot: READ, flags: 
PRIVATE, fd: 3) = 0x7f8ffee9c000
     0.111 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.135 ( 0.007 ms): sleep/10152 openat(dfd: CWD, filename: , flags: 
CLOEXEC) = 3
     0.144 ( 0.003 ms): sleep/10152 read(fd: 3, buf: 0x7ffecdd285b8, count: 
832) = 832
     0.150 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7ffecdd28450) = 0
     0.155 ( 0.005 ms): sleep/10152 mmap(len: 8192, prot: READ|WRITE, flags: 
PRIVATE|ANONYMOUS) = 0x7f8ffee9a000
     0.166 ( 0.007 ms): sleep/10152 mmap(len: 3889792, prot: EXEC|READ, flags: 
PRIVATE|DENYWRITE, fd: 3) = 0x7f8ffe8dc000
     0.175 ( 0.010 ms): sleep/10152 mprotect(start: 0x7f8ffea89000, len: 
2093056) = 0
     0.188 ( 0.010 ms): sleep/10152 mmap(addr: 0x7f8ffec88000, len: 24576, 
prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 
0x7f8ffec88000
     0.204 ( 0.005 ms): sleep/10152 mmap(addr: 0x7f8ffec8e000, len: 14976, 
prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f8ffec8e000
     0.218 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.239 ( 0.002 ms): sleep/10152 arch_prctl(option: 4098, arg2: 
140256433779968) = 0
     0.312 ( 0.009 ms): sleep/10152 mprotect(start: 0x7f8ffec88000, len: 16384, 
prot: READ) = 0
     0.343 ( 0.005 ms): sleep/10152 mprotect(start: 0x55ccff1c6000, len: 4096, 
prot: READ) = 0
     0.354 ( 0.006 ms): sleep/10152 mprotect(start: 0x7f8ffeeb6000, len: 4096, 
prot: READ) = 0
     0.362 ( 0.019 ms): sleep/10152 munmap(addr: 0x7f8ffee9c000, len: 103334) = 0
     0.476 ( 0.002 ms): sleep/10152 brk() = 0x55ccff356000
     0.480 ( 0.004 ms): sleep/10152 brk(brk: 0x55ccff377000) = 0x55ccff377000
     0.487 ( 0.002 ms): sleep/10152 brk() = 0x55ccff377000
     0.497 ( 0.008 ms): sleep/10152 open(filename: 
/usr/lib/locale/locale-archive, flags: CLOEXEC) = 3
     0.507 ( 0.002 ms): sleep/10152 fstat(fd: 3, statbuf: 0x7f8ffec8daa0) = 0
     0.511 ( 0.006 ms): sleep/10152 mmap(len: 113045344, prot: READ, flags: 
PRIVATE, fd: 3) = 0x7f8ff7d0d000
     0.524 ( 0.002 ms): sleep/10152 close(fd: 3) = 0
     0.574 (1000.140 ms): sleep/10152 nanosleep(rqtp: 0x7ffecdd29130) = 0
  1000.753 ( 0.007 ms): sleep/10152 close(fd: 1) = 0
  1000.767 ( 0.004 ms): sleep/10152 close(fd: 2) = 0
  1000.781 (         ): sleep/10152 exit_group()
[root@seventh perf]# 

     1  // SPDX-License-Identifier: GPL-2.0
     2  /*
     3   * Augment the raw_syscalls tracepoints with the contents of the 
pointer arguments.
     4   *
     5   * Test it with:
     6   *
     7   * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat 
/etc/passwd > /dev/null
     8   *
     9   * This exactly matches what is marshalled into the 
raw_syscall:sys_enter
    10   * payload expected by the 'perf trace' beautifiers.
    11   *
    12   * For now it just uses the existing tracepoint augmentation code in 
'perf
    13   * trace', in the next csets we'll hook up these with the 
sys_enter/sys_exit
    14   * code that will combine entry/exit in a strace like way.
    15   */
       
    16  #include <stdio.h>
    17  #include <linux/socket.h>
       
    18  /* bpf-output associated map */
    19  struct bpf_map SEC("maps") __augmented_syscalls__ = {
    20          .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
    21          .key_size = sizeof(int),
    22          .value_size = sizeof(u32),
    23          .max_entries = __NR_CPUS__,
    24  };
       
    25  struct syscall_enter_args {
    26          unsigned long long common_tp_fields;
    27          long               syscall_nr;
    28          unsigned long      args[6];
    29  };
       
    30  struct syscall_exit_args {
    31          unsigned long long common_tp_fields;
    32          long               syscall_nr;
    33          long               ret;
    34  };
       
    35  struct augmented_filename {
    36          unsigned int    size;
    37          int             reserved;
    38          char            value[256];
    39  };
       
    40  #define SYS_OPEN 2
    41  #define SYS_OPENAT 257
       
    42  SEC("raw_syscalls:sys_enter")
    43  int sys_enter(struct syscall_enter_args *args)
    44  {
    45          struct {
    46                  struct syscall_enter_args args;
    47                  struct augmented_filename filename;
    48          } augmented_args;
    49          unsigned int len = sizeof(augmented_args);
    50          unsigned int filename_arg = 6;
       
    51          probe_read(&augmented_args.args, sizeof(augmented_args.args), 
args);
       
    52          switch (augmented_args.args.syscall_nr) {
    53          case SYS_OPEN:   filename_arg = 0; break;
    54          case SYS_OPENAT: filename_arg = 1; break;
    55          }
       
    56          if (filename_arg <= 5) {
    57                  augmented_args.filename.reserved = 0;
    58                  augmented_args.filename.size = 
probe_read_str(&augmented_args.filename.value,
    59                                                                
sizeof(augmented_args.filename.value),
    60                                                                (const 
void *)args->args[0]);
    61                  if (augmented_args.filename.size < 
sizeof(augmented_args.filename.value)) {
    62                          len -= sizeof(augmented_args.filename.value) - 
augmented_args.filename.size;
    63                          len &= sizeof(augmented_args.filename.value) - 
1;
    64                  }
    65          } else {
    66                  len = sizeof(augmented_args.args);
    67          }
       
    68          perf_event_output(args, &__augmented_syscalls__, 
BPF_F_CURRENT_CPU, &augmented_args, len);
    69          return 0;
    70  }
       
    71  SEC("raw_syscalls:sys_exit")
    72  int sys_exit(struct syscall_exit_args *args)
    73  {
    74          return 1; /* 0 as soon as we start copying data returned by the 
kernel, e.g. 'read' */
    75  }
       
    76  license(GPL);

In line #60 if I change that to 1, then "openat" works and "open"
doesn't, so what I wanted was to use filename_arg there as the index,
now it comes from that switch, but really it'll come from userspace,
that knows the syscall tables for each arch, etc.

But if I do that, i.e. apply this patch to that program:

--- /wb/augmented_raw_syscalls.c.old    2018-11-01 15:43:55.000394234 -0300
+++ /wb/augmented_raw_syscalls.c        2018-11-01 15:44:15.102367838 -0300
@@ -67,7 +67,7 @@
                augmented_args.filename.reserved = 0;
                augmented_args.filename.size = 
probe_read_str(&augmented_args.filename.value,
                                                              
sizeof(augmented_args.filename.value),
-                                                             (const void 
*)args->args[0]);
+                                                             (const void 
*)args->args[filename_arg]);
                if (augmented_args.filename.size < 
sizeof(augmented_args.filename.value)) {
                        len -= sizeof(augmented_args.filename.value) - 
augmented_args.filename.size;
                        len &= sizeof(augmented_args.filename.value) - 1;

Then I end up with the verifier complying, I tried various ways to get
around the compiler about filename_arg being safe to use as an index,
but I couldn't find the right trick, ideas?

This is what I end up with when I apply that patch:

[root@seventh perf]# trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c 
sleep 1
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list 
available events
[root@seventh perf]# 

Using -v, as suggested, I get:

[root@seventh perf]# trace -v -e 
tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
bpf: builtin compilation failed: -95, try external compiler
Kernel build dir is set to /lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: KBUILD_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x41300
set env: CLANG_EXEC=/usr/local/bin/clang
unset env: CLANG_OPTIONS
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: PERF_BPF_INC_OPTIONS=-I/home/acme/lib/perf/include/bpf
set env: WORKING_DIR=/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build
set env: 
CLANG_SOURCE=/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c
llvm compiling command template: $CLANG_EXEC -D__KERNEL__ 
-D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS 
$PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign 
-working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf $CLANG_EMIT_LLVM 
-O2 -o - $LLVM_OPTIONS_PIPE
llvm compiling command : /usr/local/bin/clang -D__KERNEL__ -D__NR_CPUS__=4 
-DLINUX_VERSION_CODE=0x41300  -I/home/acme/lib/perf/include/bpf  -nostdinc 
-isystem /usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h  -Wno-unused-value 
-Wno-pointer-sign -working-directory 
/lib/modules/4.19.0-rc8-00014-gc0cff31be705/build -c 
/home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c -target 
bpf  -O2 -o - 
libbpf: loading object 'tools/perf/examples/bpf/augmented_raw_syscalls.c' from 
buffer
libbpf: section(1) .strtab, size 168, link 0, flags 0, type=3
libbpf: skip section(1) .strtab
libbpf: section(2) .text, size 0, link 0, flags 6, type=1
libbpf: skip section(2) .text
libbpf: section(3) raw_syscalls:sys_enter, size 376, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_enter
libbpf: section(4) .relraw_syscalls:sys_enter, size 16, link 10, flags 0, type=9
libbpf: section(5) raw_syscalls:sys_exit, size 16, link 0, flags 6, type=1
libbpf: found program raw_syscalls:sys_exit
libbpf: section(6) maps, size 56, link 0, flags 3, type=1
libbpf: section(7) license, size 4, link 0, flags 3, type=1
libbpf: license of tools/perf/examples/bpf/augmented_raw_syscalls.c is GPL
libbpf: section(8) version, size 4, link 0, flags 3, type=1
libbpf: kernel version of tools/perf/examples/bpf/augmented_raw_syscalls.c is 
41300
libbpf: section(9) .llvm_addrsig, size 6, link 10, flags 80000000, 
type=1879002115
libbpf: skip section(9) .llvm_addrsig
libbpf: section(10) .symtab, size 240, link 1, flags 0, type=2
libbpf: maps in tools/perf/examples/bpf/augmented_raw_syscalls.c: 2 maps in 56 
bytes
libbpf: map 0 is "__augmented_syscalls__"
libbpf: map 1 is "__bpf_stdout__"
libbpf: collecting relocating info for: 'raw_syscalls:sys_enter'
libbpf: relo for 4 value 28 name 124
libbpf: relocation: insn_idx=39
libbpf: relocation: find map 1 (__augmented_syscalls__) for insn 39
bpf: config program 'raw_syscalls:sys_enter'
bpf: config program 'raw_syscalls:sys_exit'
libbpf: create map __bpf_stdout__: fd=3
libbpf: create map __augmented_syscalls__: fd=4
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf: 
0: (bf) r6 = r1
1: (bf) r1 = r10
2: (07) r1 += -328
3: (b7) r7 = 64
4: (b7) r2 = 64
5: (bf) r3 = r6
6: (85) call bpf_probe_read#4
7: (b7) r2 = 1
8: (79) r3 = *(u64 *)(r10 -320)
9: (15) if r3 == 0x101 goto pc+1
 R0=inv(id=0) R2=inv1 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 
R10=fp0,call_-1
10: (b7) r2 = 6
11: (b7) r1 = 0
12: (15) if r3 == 0x2 goto pc+1
 R0=inv(id=0) R1=inv0 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 
R10=fp0,call_-1
13: (bf) r1 = r2
14: (25) if r1 > 0x5 goto pc+21
 R0=inv(id=0) R1=inv6 R2=inv6 R3=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=inv64 
R10=fp0,call_-1
15: (b7) r2 = 0
16: (63) *(u32 *)(r10 -260) = r2
17: (67) r1 <<= 32
18: (77) r1 >>= 32
19: (67) r1 <<= 3
20: (bf) r2 = r6
21: (0f) r2 += r1
22: (79) r3 = *(u64 *)(r2 +16)
R2 invalid mem access 'inv'

libbpf: -- END LOG --
libbpf: failed to load program 'raw_syscalls:sys_enter'
libbpf: failed to load object 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading)
event syntax error: 'tools/perf/examples/bpf/augmented_raw_syscalls.c'
                     \___ Kernel verifier blocks program loading

(add -v to see detail)
Run 'perf list' for a list of valid events

 Usage: perf trace [<options>] [<command>]
    or: perf trace [<options>] -- <command> [<options>]
    or: perf trace record [<options>] [<command>]
    or: perf trace record [<options>] -- <command> [<options>]

    -e, --event <event>   event/syscall selector. use 'perf list' to list 
available events
[root@seventh perf]# 

Reply via email to