On Wed, Oct 09, 2019 at 09:15:03PM -0700, Alexei Starovoitov wrote:
> Load basic cls_bpf program.
> Load raw_tracepoint program and attach to kfree_skb raw tracepoint.
> Trigger cls_bpf via prog_test_run.
> At the end of test_run kernel will call kfree_skb
> which will trigger trace_kfree_skb tracepoint.
> Which will call our raw_tracepoint program.
> Which will take that skb and will dump it into perf ring buffer.
> Check that user space received correct packet.
> 
> Signed-off-by: Alexei Starovoitov <a...@kernel.org>
> Acked-by: Andrii Nakryiko <andr...@fb.com>
> ---
>  .../selftests/bpf/prog_tests/kfree_skb.c      | 90 +++++++++++++++++++
>  tools/testing/selftests/bpf/progs/kfree_skb.c | 74 +++++++++++++++
>  2 files changed, 164 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/kfree_skb.c
>  create mode 100644 tools/testing/selftests/bpf/progs/kfree_skb.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c 
> b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
> new file mode 100644
> index 000000000000..0cf91b6bf276
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
> @@ -0,0 +1,90 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +
> +static void on_sample(void *ctx, int cpu, void *data, __u32 size)
> +{
> +     int ifindex = *(int *)data, duration = 0;
> +     struct ipv6_packet *pkt_v6 = data + 4;
> +
> +     if (ifindex != 1)
> +             /* spurious kfree_skb not on loopback device */
> +             return;
> +     if (CHECK(size != 76, "check_size", "size %u != 76\n", size))
> +             return;
> +     if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
> +               "h_proto %x\n", pkt_v6->eth.h_proto))
> +             return;
> +     if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
> +               "iph.nexthdr %x\n", pkt_v6->iph.nexthdr))
> +             return;
> +     if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp",
> +               "tcp.doff %x\n", pkt_v6->tcp.doff))
> +             return;
> +
> +     *(bool *)ctx = true;
> +}
> +
> +void test_kfree_skb(void)
> +{
> +     struct bpf_prog_load_attr attr = {
> +             .file = "./kfree_skb.o",
> +             .log_level = 2,
> +     };
> +
> +     struct bpf_object *obj, *obj2 = NULL;
> +     struct perf_buffer_opts pb_opts = {};
> +     struct perf_buffer *pb = NULL;
> +     struct bpf_link *link = NULL;
> +     struct bpf_map *perf_buf_map;
> +     struct bpf_program *prog;
> +     __u32 duration, retval;
> +     int err, pkt_fd, kfree_skb_fd;
> +     bool passed = false;
> +
> +     err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 
> &obj, &pkt_fd);
> +     if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
> +             return;
> +
> +     err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
> +     if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
> +             goto close_prog;
> +
> +     prog = bpf_object__find_program_by_title(obj2, 
> "raw_tracepoint/kfree_skb");
> +     if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
> +             goto close_prog;
> +     link = bpf_program__attach_raw_tracepoint(prog, NULL);
> +     if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
> +             goto close_prog;
> +
> +     perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
> +     if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
> +             goto close_prog;
> +
> +     /* set up perf buffer */
> +     pb_opts.sample_cb = on_sample;
> +     pb_opts.ctx = &passed;
> +     pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
> +     if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
> +             goto close_prog;
> +
> +     err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
> +                             NULL, NULL, &retval, &duration);
> +     CHECK(err || retval, "ipv6",
> +           "err %d errno %d retval %d duration %d\n",
> +           err, errno, retval, duration);
> +
> +     /* read perf buffer */
> +     err = perf_buffer__poll(pb, 100);
> +     if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
> +             goto close_prog;
> +     /* make sure kfree_skb program was triggered
> +      * and it sent expected skb into ring buffer
> +      */
> +     CHECK_FAIL(!passed);
> +close_prog:
> +     perf_buffer__free(pb);
> +     if (!IS_ERR_OR_NULL(link))
> +             bpf_link__destroy(link);
> +     bpf_object__close(obj);
> +     bpf_object__close(obj2);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c 
> b/tools/testing/selftests/bpf/progs/kfree_skb.c
> new file mode 100644
> index 000000000000..fc25797cc64d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
> @@ -0,0 +1,74 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (c) 2019 Facebook
> +#include <linux/bpf.h>
> +#include "bpf_helpers.h"
> +
> +char _license[] SEC("license") = "GPL";
> +struct {
> +     __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
> +     __uint(key_size, sizeof(int));
> +     __uint(value_size, sizeof(int));
> +} perf_buf_map SEC(".maps");
> +
> +#define _(P) (__builtin_preserve_access_index(P))
> +
> +/* define few struct-s that bpf program needs to access */
> +struct callback_head {
> +     struct callback_head *next;
> +     void (*func)(struct callback_head *head);
> +};
> +struct dev_ifalias {
> +     struct callback_head rcuhead;
> +};
> +
> +struct net_device /* same as kernel's struct net_device */ {
> +     int ifindex;
> +     struct dev_ifalias *ifalias;
> +};
> +
> +struct sk_buff {
> +     /* field names and sizes should match to those in the kernel */
> +     unsigned int len, data_len;
> +     __u16 mac_len, hdr_len, queue_mapping;
> +     struct net_device *dev;
> +     /* order of the fields doesn't matter */
> +};
> +
> +/* copy arguments from
> + * include/trace/events/skb.h:
> + * TRACE_EVENT(kfree_skb,
> + *         TP_PROTO(struct sk_buff *skb, void *location),
> + *
> + * into struct below:
> + */
> +struct trace_kfree_skb {
> +     struct sk_buff *skb;
> +     void *location;
> +};
> +
> +SEC("raw_tracepoint/kfree_skb")
> +int trace_kfree_skb(struct trace_kfree_skb *ctx)
> +{
> +     struct sk_buff *skb = ctx->skb;
> +     struct net_device *dev;
> +     int ifindex;
> +     struct callback_head *ptr;
> +     void *func;
> +
> +     __builtin_preserve_access_index(({
> +             dev = skb->dev;
> +             ifindex = dev->ifindex;

Hi Alexei,

The patchset looks very useful. One question: Is it always safe to
access 'skb->dev->ifindex' here? I'm asking because 'dev' is inside a
union with 'dev_scratch' which is 'unsigned long' and therefore might
not always be a valid memory address. Consider for example the following
code path:

...
__udp_queue_rcv_skb(sk, skb)
        __udp_enqueue_schedule_skb(sk, skb)
                udp_set_dev_scratch(skb)
                // returns error
        ...
        kfree_skb(skb) // ebpf program is invoked

How is this handled by eBPF?

Thanks

> +             ptr = dev->ifalias->rcuhead.next;
> +             func = ptr->func;
> +     }));
> +
> +     bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
> +     bpf_printk("skb->len %d\n", _(skb->len));
> +     bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping));
> +     bpf_printk("dev->ifindex %d\n", ifindex);
> +
> +     /* send first 72 byte of the packet to user space */
> +     bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU,
> +                    &ifindex, sizeof(ifindex));
> +     return 0;
> +}
> -- 
> 2.23.0
> 

Reply via email to