From: Björn Töpel <bjorn.to...@intel.com> Another setsockopt (XDP_RX_QUEUE) is added to let the process allocate a queue, where the kernel can pass completed Rx frames from the kernel to user process.
The mmapping of the queue is done using the XDP_PGOFF_RX_QUEUE offset. Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- include/uapi/linux/if_xdp.h | 16 ++++++++++++++++ net/xdp/xsk.c | 42 +++++++++++++++++++++++++++++++++--------- net/xdp/xsk_queue.c | 11 ++++++++--- net/xdp/xsk_queue.h | 11 ++++++++++- 4 files changed, 67 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 0de1bbf2c5c7..118456064e01 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -21,6 +21,7 @@ #include <linux/types.h> /* XDP socket options */ +#define XDP_RX_QUEUE 1 #define XDP_UMEM_REG 3 #define XDP_UMEM_FILL_QUEUE 4 @@ -32,13 +33,28 @@ struct xdp_umem_reg { }; /* Pgoff for mmaping the rings */ +#define XDP_PGOFF_RX_QUEUE 0 #define XDP_UMEM_PGOFF_FILL_QUEUE 0x100000000 +struct xdp_desc { + __u32 idx; + __u32 len; + __u16 offset; + __u8 flags; + __u8 padding[5]; +}; + struct xdp_queue { __u32 head_idx __attribute__((aligned(64))); __u32 tail_idx __attribute__((aligned(64))); }; +/* Used for the RX and TX queues for packets */ +struct xdp_rxtx_queue { + struct xdp_queue ptrs; + struct xdp_desc desc[0] __attribute__((aligned(64))); +}; + /* Used for the fill and completion queues for buffers */ struct xdp_umem_queue { struct xdp_queue ptrs; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6ff1d1f3322f..f82f750cadc2 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -38,6 +38,8 @@ struct xdp_sock { /* struct sock must be the first member of struct xdp_sock */ struct sock sk; + struct xsk_queue *rx; + struct net_device *dev; /* Protects multiple processes in the control path */ struct mutex mutex; struct xdp_umem *umem; @@ -48,14 +50,15 @@ static struct xdp_sock *xdp_sk(struct sock *sk) return (struct xdp_sock *)sk; } -static int xsk_init_queue(u32 entries, struct xsk_queue **queue) +static int xsk_init_queue(u32 entries, struct xsk_queue **queue, + bool umem_queue) { struct xsk_queue *q; if (entries == 0 || *queue || !is_power_of_2(entries)) return -EINVAL; - q = xskq_create(entries); + q = xskq_create(entries, umem_queue); if (!q) return -ENOMEM; @@ -97,6 +100,22 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; switch (optname) { + case XDP_RX_QUEUE: + { + struct xsk_queue **q; + int entries; + + if (optlen < sizeof(entries)) + return -EINVAL; + if (copy_from_user(&entries, optval, sizeof(entries))) + return -EFAULT; + + mutex_lock(&xs->mutex); + q = &xs->rx; + err = xsk_init_queue(entries, q, false); + mutex_unlock(&xs->mutex); + return err; + } case XDP_UMEM_REG: { struct xdp_umem_reg mr; @@ -138,7 +157,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, mutex_lock(&xs->mutex); q = &xs->umem->fq; - err = xsk_init_queue(entries, q); + err = xsk_init_queue(entries, q, true); mutex_unlock(&xs->mutex); return err; } @@ -160,13 +179,17 @@ static int xsk_mmap(struct file *file, struct socket *sock, struct page *qpg; int err; - if (!xs->umem) - return -EINVAL; + if (offset == XDP_PGOFF_RX_QUEUE) { + q = xs->rx; + } else { + if (!xs->umem) + return -EINVAL; - if (offset == XDP_UMEM_PGOFF_FILL_QUEUE) - q = xs->umem->fq; - else - return -EINVAL; + if (offset == XDP_UMEM_PGOFF_FILL_QUEUE) + q = xs->umem->fq; + else + return -EINVAL; + } qpg = virt_to_head_page(q->ring); if (size > (PAGE_SIZE << compound_order(qpg))) @@ -213,6 +236,7 @@ static void xsk_destruct(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) return; + xskq_destroy(xs->rx); xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index fd4bb06aa112..3ce6ef350850 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -16,7 +16,7 @@ #include "xsk_queue.h" -struct xsk_queue *xskq_create(u32 nentries) +struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) { struct xsk_queue *q; gfp_t gfp_flags; @@ -31,8 +31,13 @@ struct xsk_queue *xskq_create(u32 nentries) gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | __GFP_NORETRY; - size = xskq_umem_get_ring_size(q); - q->validation = XSK_VALIDATION_RX; + if (umem_queue) { + size = xskq_umem_get_ring_size(q); + q->validation = XSK_VALIDATION_RX; + } else { + size = xskq_rxtx_get_ring_size(q); + q->validation = XSK_VALIDATION_TX; + } q->ring = (struct xdp_queue *)__get_free_pages(gfp_flags, get_order(size)); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index fe845a20c153..d79b613a9e0a 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -22,6 +22,7 @@ enum xsk_validation { XSK_VALIDATION_RX, /* Only address to packet buffer validated */ + XSK_VALIDATION_TX /* Full descriptor is validated */ }; struct xsk_queue { @@ -36,6 +37,14 @@ struct xsk_queue { u64 invalid_descs; }; +/* Functions operating on RXTX queues only */ + +static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) +{ + return (sizeof(struct xdp_queue) + + q->nentries * sizeof(struct xdp_desc)); +} + /* Functions operating on UMEM queues only */ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q) @@ -43,7 +52,7 @@ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q) return sizeof(struct xdp_umem_queue) + q->nentries * sizeof(u32); } -struct xsk_queue *xskq_create(u32 nentries); +struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); #endif /* _LINUX_XDP_QUEUE_H */ -- 2.14.1