This patch only introduce the core data structures and API functions. All XDP enabled drivers must use the API before this info can used.
There is a need for XDP to know more about the RX-queue a given XDP frames have arrived on. For both the XDP bpf-prog and kernel side. Instead of extending xdp_buff each time new info is needed, the patch creates a separate read-mostly struct xdp_rxq_info, that contains this info. We stress this data/cache-line is for read-only info. This is NOT for dynamic per packet info, use the data_meta for such use-cases. The performance advantage is this info can be setup at RX-ring init time, instead of updating N-members in xdp_buff. A possible (driver level) micro optimization is that xdp_buff->rxq assignment could be done once per XDP/NAPI loop. The extra pointer deref only happens for program needing access to this info (thus, no slowdown to existing use-cases). Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com> --- include/linux/filter.h | 2 ++ include/net/xdp.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/core/Makefile | 2 +- net/core/xdp.c | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 include/net/xdp.h create mode 100644 net/core/xdp.c diff --git a/include/linux/filter.h b/include/linux/filter.h index 5feb441d3dd9..111107fcace6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,6 +19,7 @@ #include <linux/cryptohash.h> #include <linux/set_memory.h> +#include <net/xdp.h> #include <net/sch_generic.h> #include <uapi/linux/filter.h> @@ -496,6 +497,7 @@ struct xdp_buff { void *data_end; void *data_meta; void *data_hard_start; + struct xdp_rxq_info *rxq; }; /* Compute the linear packet data range [data, data_end) which diff --git a/include/net/xdp.h b/include/net/xdp.h new file mode 100644 index 000000000000..e4acd198fd60 --- /dev/null +++ b/include/net/xdp.h @@ -0,0 +1,45 @@ +/* include/net/xdp.h + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ +#ifndef __LINUX_NET_XDP_H__ +#define __LINUX_NET_XDP_H__ + +/** + * DOC: XDP RX-queue information + * + * The XDP RX-queue info (xdp_rxq_info) is associated with the driver + * level RX-ring queues. It is information that is specific to how + * the driver have configured a given RX-ring queue. + * + * Each xdp_buff frame received in the driver carry a (pointer) + * reference to this xdp_rxq_info structure. This provides the XDP + * data-path read-access to RX-info for both kernel and bpf-side + * (limited subset). + * + * For now, direct access is only safe while running in NAPI/softirq + * context. + * + * The driver usage API is an init, register and unregister API. + * + * The struct is not directly tied to the XDP prog. A new XDP prog + * can be attached as long as it doesn't change the underlying + * RX-ring. If the RX-ring does change significantly, the NIC driver + * naturally need to stop the RX-ring before purging and reallocating + * memory. In that process the driver MUST call unregistor (which + * also apply for driver shutdown and unload). The init and register + * API is also mandatory during RX-ring setup. + */ + +struct xdp_rxq_info { + struct net_device *dev; + u32 queue_index; + u32 reg_state; +} ____cacheline_aligned; /* perf critical, avoid false-sharing */ + +void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq); +void xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq); +void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); + +#endif /* __LINUX_NET_XDP_H__ */ diff --git a/net/core/Makefile b/net/core/Makefile index 1fd0a9c88b1b..6dbbba8c57ae 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o + fib_notifier.o xdp.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o diff --git a/net/core/xdp.c b/net/core/xdp.c new file mode 100644 index 000000000000..a9d2dd7b1ede --- /dev/null +++ b/net/core/xdp.c @@ -0,0 +1,40 @@ +/* net/core/xdp.c + * + * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. + * Released under terms in GPL version 2. See COPYING. + */ +#include <linux/types.h> +#include <linux/mm.h> + +#include <net/xdp.h> + +#define REG_STATE_NEW 0x0 +#define REG_STATE_REGISTRED 0x1 +#define REG_STATE_UNREGISTRED 0x2 + +void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) +{ + xdp_rxq->reg_state = REG_STATE_UNREGISTRED; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); + +void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) +{ + if (xdp_rxq->reg_state == REG_STATE_REGISTRED) { + WARN(1, "Missing unregister, handled but fix driver\n"); + xdp_rxq_info_unreg(xdp_rxq); + } + memset(xdp_rxq, 0, sizeof(*xdp_rxq)); + xdp_rxq->queue_index = U32_MAX; + xdp_rxq->reg_state = REG_STATE_NEW; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_init); + +void xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq) +{ + WARN(!xdp_rxq->dev, "Missing net_device from driver"); + WARN(xdp_rxq->queue_index == U32_MAX, "Miss queue_index from driver"); + WARN(!(xdp_rxq->reg_state == REG_STATE_NEW),"API violation, miss init"); + xdp_rxq->reg_state = REG_STATE_REGISTRED; +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);