From: Björn Töpel <bjorn.to...@intel.com> In this commit the XDP_ATTACH bind() flag is introduced. When an XDP socket is bound with this flag set, the socket will be associated with a certain netdev Rx queue. The idea is that the XDP socket users do not have to deal with the XSKMAP. Instead, XDP_ATTACH will "attach" an XDP socket to a queue, and XDP programs simply use bpf_xsk_redirect to redirect XDP frames to an attached socket.
An XDP socket bound with this option performs better, since the BPF program is smaller, and the kernel code-path also has fewer instructions. This commit only introduces the first part of XDP_ATTACH, namely associating the XDP socket to a netdev Rx queue. The bpf_xsk_redirect function will be introduced in the next commit. Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- include/linux/netdevice.h | 1 + include/net/xdp_sock.h | 3 +++ include/uapi/linux/if_xdp.h | 1 + net/xdp/xsk.c | 51 +++++++++++++++++++++++++++++-------- 4 files changed, 45 insertions(+), 11 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fc6ba71513be..a2d19af6b8dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -743,6 +743,7 @@ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_XDP_SOCKETS struct xdp_umem *umem; + struct xdp_sock *xsk; #endif } ____cacheline_aligned_in_smp; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 13acb9803a6d..13975723430c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -61,6 +61,7 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head list; bool zc; + bool attached; /* Protects multiple processes in the control path */ struct mutex mutex; /* Mutual exclusion of NAPI TX thread and sendmsg error paths @@ -72,7 +73,9 @@ struct xdp_sock { struct xdp_buff; #ifdef CONFIG_XDP_SOCKETS +int xsk_generic_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); +int xsk_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index caed8b1614ff..bd76235c2749 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -16,6 +16,7 @@ #define XDP_SHARED_UMEM (1 << 0) #define XDP_COPY (1 << 1) /* Force copy-mode */ #define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +#define XDP_ATTACH (1 << 3) struct sockaddr_xdp { __u16 sxdp_family; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a03268454a27..08d66a22185d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -100,17 +100,20 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return err; } -int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +int xsk_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { - u32 len; + u32 len = xdp->data_end - xdp->data; + + return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ? + __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len); +} +int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; - len = xdp->data_end - xdp->data; - - return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ? - __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len); + return xsk_attached_rcv(xs, xdp); } void xsk_flush(struct xdp_sock *xs) @@ -119,7 +122,7 @@ void xsk_flush(struct xdp_sock *xs) xs->sk.sk_data_ready(&xs->sk); } -int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +int xsk_generic_attached_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 metalen = xdp->data - xdp->data_meta; u32 len = xdp->data_end - xdp->data; @@ -127,9 +130,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u64 addr; int err; - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) - return -EINVAL; - if (!xskq_peek_addr(xs->umem->fq, &addr) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; @@ -152,6 +152,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return err; } +int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + return -EINVAL; + + return xsk_generic_attached_rcv(xs, xdp); +} + void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { xskq_produce_flush_addr_n(umem->cq, nb_entries); @@ -339,6 +347,19 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, return 0; } +static void xsk_detach(struct xdp_sock *xs) +{ + if (xs->attached) + WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, NULL); +} + +static int xsk_attach(struct xdp_sock *xs, struct net_device *dev, u16 qid) +{ + xs->attached = true; + WRITE_ONCE(dev->_rx[qid].xsk, xs); + return 0; +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -359,6 +380,7 @@ static int xsk_release(struct socket *sock) /* Wait for driver to stop using the xdp socket. */ xdp_del_sk_umem(xs->umem, xs); + xsk_detach(xs); xs->dev = NULL; synchronize_net(); dev_put(dev); @@ -432,7 +454,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct xdp_sock *umem_xs; struct socket *sock; - if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) { + if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || + (flags & XDP_ATTACH)) { /* Cannot specify flags for shared sockets. */ err = -EINVAL; goto out_unlock; @@ -478,6 +501,12 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) goto out_unlock; + + if (flags & XDP_ATTACH) { + err = xsk_attach(xs, dev, qid); + if (err) + goto out_unlock; + } } xs->dev = dev; -- 2.19.1