This helper obtains the active namespace from current and returns pid, tgid, device and namespace id as seen from that namespace, allowing to instrument a process inside a container. Device is read from /proc/self/ns/pid, as in the future it's possible that different pid_ns files may belong to different devices, according to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers conference.
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's scripts but this helper returns the pid as seen by the root namespace which is fine when a bcc script is not executed inside a container. When the process of interest is inside a container, pid filtering will not work if bpf_get_current_pid_tgid() is used. This helper addresses this limitation returning the pid as it's seen by the current namespace where the script is executing. This helper has the same use cases as bpf_get_current_pid_tgid() as it can be used to do pid filtering even inside a container. For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py): u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != <pid_arg_passed_in>) return 0; Could be modified to use bpf_get_current_pidns_info() as follows: struct bpf_pidns pidns; bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns)); u32 pid = pidns.tgid; u32 nsid = pidns.nsid; if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>)) return 0; To find out the name PID namespace id of a process, you could use this command: $ ps -h -o pidns -p <pid_of_interest> Or this other command: $ ls -Li /proc/<pid_of_interest>/ns/pid Signed-off-by: Carlos Antonio Neira Bustos <cneirabus...@gmail.com> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 24 +++++++++++- kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 2 + samples/bpf/Makefile | 3 ++ samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++ samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 24 +++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++ 10 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 samples/bpf/trace_ns_info_user.c create mode 100644 samples/bpf/trace_ns_info_user_kern.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cd8790d2c6ed..3f4b999f7c99 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dd5758dc35d3..8462f9881465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2113,6 +2113,18 @@ union bpf_attr { * the shared data. * Return * Pointer to the local storage area. + * + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns) + * Description + * Copies into *pidns* pid, namespace id and tgid as seen by the + * current namespace and also device from /proc/self/ns/pid. + * *size_of_pidns* must be the size of *pidns* + * + * This helper is used when pid filtering is needed inside a + * container as bpf_get_current_tgid() helper returns always the + * pid id as seen by the root namespace. + * Return + * 0 on success -EINVAL on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2196,7 +2208,8 @@ union bpf_attr { FN(rc_keydown), \ FN(skb_cgroup_id), \ FN(get_current_cgroup_id), \ - FN(get_local_storage), + FN(get_local_storage), \ + FN(get_current_pidns_info), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2724,4 +2737,13 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +/* helper bpf_get_current_pidns_info will store the following + * data, dev will contain major/minor from /proc/self/ns/pid. + */ +struct bpf_pidns_info { + __u32 dev; + __u32 nsid; + __u32 tgid; + __u32 pid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4d09e610777f..98ce53ce2ea6 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak; const struct bpf_func_proto bpf_sock_hash_update_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; +const struct bpf_func_proto bpf_get_current_pidns_info __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1991466b8327..d06d723b9cff 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,9 @@ #include <linux/sched.h> #include <linux/uidgid.h> #include <linux/filter.h> +#include <linux/pid_namespace.h> +#include <linux/major.h> +#include <linux/stat.h> /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = { .arg2_type = ARG_ANYTHING, }; #endif + +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32, + size) +{ + const char *ppath = "/proc/self/ns/pid"; + struct pid_namespace *pidns = NULL; + mm_segment_t oldsegfs; + struct kstat stat; + pid_t tgid = 0; + pid_t pid = 0; + int res = 0; + + if (unlikely(size != sizeof(struct bpf_pidns_info))) + goto clear; + + pidns = task_active_pid_ns(current); + + if (unlikely(!pidns)) + goto clear; + + pidns_info->nsid = pidns->ns.inum; + pid = task_pid_nr_ns(current, pidns); + + if (unlikely(!pid)) + goto clear; + + tgid = task_tgid_nr_ns(current, pidns); + + if (unlikely(!tgid)) + goto clear; + + pidns_info->tgid = (u32) tgid; + pidns_info->pid = (u32) pid; + + oldsegfs = get_fs(); + set_fs(KERNEL_DS); + res = vfs_stat((const char __user *)ppath, &stat); + set_fs(oldsegfs); + + if (unlikely(res)) + goto clear; + + pidns_info->dev = stat.dev; + + return 0; + +clear: + if (pidns_info) + memset((void *)pidns, 0, (size_t) size); + + return -EINVAL; +} + +const struct bpf_func_proto bpf_get_current_pidns_info_proto = { + .func = bpf_get_current_pidns_info, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; + diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ae6829804bc..f70be29e49ab 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; #endif + case BPF_FUNC_get_current_pidns_info: + return &bpf_get_current_pidns_info_proto; default: return NULL; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index f88d5683d6ee..fdcde00554ce 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -53,6 +53,7 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query hostprogs-y += xdp_sample_pkts +hostprogs-y += trace_ns_info # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +trace_ns_info-objs := bpf_load.o trace_ns_info_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -166,6 +168,7 @@ always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o +always += trace_ns_info_user_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c new file mode 100644 index 000000000000..e06d08db6f30 --- /dev/null +++ b/samples/bpf/trace_ns_info_user.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabus...@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "bpf/libbpf.h" +#include "bpf_load.h" + +/* This code was taken verbatim from tracex1_user.c, it's used + * to exercize bpf_get_current_pidns_info() helper call. + */ +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); + printf("loading %s\n", filename); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping localhost", "r"); + (void) f; + read_trace_pipe(); + return 0; +} diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c new file mode 100644 index 000000000000..ceaf3e83c8e7 --- /dev/null +++ b/samples/bpf/trace_ns_info_user_kern.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabus...@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +typedef __u64 u64; +typedef __u32 u32; + + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ + +/* This will call bpf_get_current_pidns_info() to display pid and ns values + * as seen by the current namespace, on the far left you will see the pid as + * seen as by the root namespace. + */ + +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + char fmt[] = "nsid:%u, dev: %u, pid:%u\n"; + struct bpf_pidns_info nsinfo; + int ok = 0; + + ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)); + if (ok == 0) + bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid, + (u32) nsinfo.dev, (u32)nsinfo.pid); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dd5758dc35d3..8462f9881465 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2113,6 +2113,18 @@ union bpf_attr { * the shared data. * Return * Pointer to the local storage area. + * + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns) + * Description + * Copies into *pidns* pid, namespace id and tgid as seen by the + * current namespace and also device from /proc/self/ns/pid. + * *size_of_pidns* must be the size of *pidns* + * + * This helper is used when pid filtering is needed inside a + * container as bpf_get_current_tgid() helper returns always the + * pid id as seen by the root namespace. + * Return + * 0 on success -EINVAL on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2196,7 +2208,8 @@ union bpf_attr { FN(rc_keydown), \ FN(skb_cgroup_id), \ FN(get_current_cgroup_id), \ - FN(get_local_storage), + FN(get_local_storage), \ + FN(get_current_pidns_info), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2724,4 +2737,13 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +/* helper bpf_get_current_pidns_info will store the following + * data, dev will contain major/minor from /proc/self/ns/pid. + */ +struct bpf_pidns_info { + __u32 dev; + __u32 nsid; + __u32 tgid; + __u32 pid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index cb9fcfbc9307..c3060f3284e0 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) = (void *) BPF_FUNC_get_current_cgroup_id; static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = (void *) BPF_FUNC_get_local_storage; +static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf, + unsigned int buf_size) = + (void *) BPF_FUNC_get_current_pidns_info; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- 2.11.0