From: Björn Töpel <bjorn.to...@intel.com> Here the actual receive functions of AF_XDP are implemented, that in a later commit, will be called from the XDP layers.
There's one set of functions for the XDP_DRV side and another for XDP_SKB (generic). Support for the poll syscall is also implemented. Signed-off-by: Björn Töpel <bjorn.to...@intel.com> --- net/xdp/xdp_umem.h | 18 +++++ net/xdp/xsk.c | 81 ++++++++++++++++++++- net/xdp/xsk_queue.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 304 insertions(+), 1 deletion(-) diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index ad041b911b38..5e7105b7760b 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -36,6 +36,24 @@ struct xdp_umem { struct user_struct *user; }; +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx) +{ + u64 pg, off; + char *data; + + pg = idx >> umem->nfpplog2; + off = (idx - (pg << umem->nfpplog2)) << umem->frame_size_log2; + + data = page_address(umem->pgs[pg]); + return data + off; +} + +static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem, + u32 idx) +{ + return xdp_umem_get_data(umem, idx) + umem->frame_headroom; +} + bool xdp_umem_validate_queues(struct xdp_umem *umem); int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr); void xdp_get_umem(struct xdp_umem *umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d99a1b830f94..a60b1fcfb2b3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -35,10 +35,14 @@ #include "xsk_queue.h" #include "xdp_umem.h" +#define RX_BATCH_SIZE 16 + struct xdp_sock { /* struct sock must be the first member of struct xdp_sock */ struct sock sk; struct xsk_queue *rx; + struct xskq_iter rx_it; + u64 rx_dropped; struct net_device *dev; /* Protects multiple processes in the control path */ struct mutex mutex; @@ -52,6 +56,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk) return (struct xdp_sock *)sk; } +static inline int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + u32 len = xdp->data_end - xdp->data; + void *buffer; + int err = 0; + u32 id; + + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + return -EINVAL; + + if (!xskq_next_frame_deq(xs->umem->fq, &xs->rx_it, RX_BATCH_SIZE)) + return -ENOSPC; + + id = xdp_umem_get_id(xs->umem->fq, &xs->rx_it); + buffer = xdp_umem_get_data_with_headroom(xs->umem, id); + memcpy(buffer, xdp->data, len); + err = xskq_rxtx_enq_frame(xs->rx, id, len, xs->umem->frame_headroom); + if (err) + xskq_deq_return_frame(&xs->rx_it); + + return err; +} + +int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + int err; + + err = __xsk_rcv(xs, xdp); + if (!err) + page_frag_free(xdp->data); + else + xs->rx_dropped++; + + return err; +} + +void xsk_flush(struct xdp_sock *xs) +{ + xskq_enq_flush(xs->rx); + xs->sk.sk_data_ready(&xs->sk); +} + +int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + int err; + + err = __xsk_rcv(xs, xdp); + if (!err) + xsk_flush(xs); + else + xs->rx_dropped++; + + return err; +} + +static unsigned int xsk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) +{ + unsigned int mask = datagram_poll(file, sock, wait); + struct sock *sk = sock->sk; + struct xdp_sock *xs = xdp_sk(sk); + + if (xs->rx && !xskq_empty(xs->rx)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + static int xsk_init_queue(u32 entries, struct xsk_queue **queue, bool umem_queue) { @@ -190,6 +262,9 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { err = -EINVAL; goto out_unlock; + } else { + /* This xsk has its own umem. */ + xskq_set_umem(xs->umem->fq, &xs->umem->props); } /* Rebind? */ @@ -204,6 +279,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->ifindex = sxdp->sxdp_ifindex; xs->queue_id = sxdp->sxdp_queue_id; + xskq_init_iter(&xs->rx_it); + + xskq_set_umem(xs->rx, &xs->umem->props); + out_unlock: if (err) dev_put(dev); @@ -340,7 +419,7 @@ static const struct proto_ops xsk_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll = sock_no_poll, + .poll = xsk_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index d79b613a9e0a..af6e651f1207 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -37,6 +37,187 @@ struct xsk_queue { u64 invalid_descs; }; +struct xskq_iter { + u32 head; + u32 tail; + struct xdp_desc desc_copy; +}; + +/* Common functions operating for both RXTX and umem queues */ + +static inline bool xskq_is_valid_rx_entry(struct xsk_queue *q, + u32 idx) +{ + if (unlikely(idx >= q->umem_props->nframes)) { + q->invalid_descs++; + return false; + } + return true; +} + +static inline bool xskq_is_valid_tx_entry(struct xsk_queue *q, + struct xdp_desc *d) +{ + u32 buff_len; + + if (unlikely(d->idx >= q->umem_props->nframes)) { + q->invalid_descs++; + return false; + } + + buff_len = q->umem_props->frame_size; + if (unlikely(d->len > buff_len || d->len == 0 || + d->offset > buff_len || d->offset + d->len > buff_len)) { + q->invalid_descs++; + return false; + } + + return true; +} + +static inline u32 xskq_nb_free(struct xsk_queue *q, u32 head_idx, u32 dcnt) +{ + u32 free_entries = q->nentries - (head_idx - q->cached_tail); + + if (free_entries >= dcnt) + return free_entries; + + /* Refresh the local tail pointer */ + q->cached_tail = READ_ONCE(q->ring->tail_idx); + return q->nentries - (head_idx - q->cached_tail); +} + +static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) +{ + u32 entries = q->cached_head - q->cached_tail; + + if (entries == 0) + /* Refresh the local head pointer */ + q->cached_head = READ_ONCE(q->ring->head_idx); + + entries = q->cached_head - q->cached_tail; + return (entries > dcnt) ? dcnt : entries; +} + +static inline bool xskq_empty(struct xsk_queue *q) +{ + if (xskq_nb_free(q, q->cached_head, 1) == q->nentries) + return true; + return false; +} + +static inline bool xskq_full(struct xsk_queue *q) +{ + if (xskq_nb_avail(q, q->nentries) == q->nentries) + return true; + return false; +} + +static inline void xskq_init_iter(struct xskq_iter *it) +{ + it->head = 0; + it->tail = 0; +} + +static inline void xskq_set_umem(struct xsk_queue *q, + struct xdp_umem_props *umem_props) +{ + q->umem_props = umem_props; +} + +static inline bool xskq_iter_end(struct xskq_iter *it) +{ + return it->tail == it->head; +} + +static inline void xskq_iter_validate(struct xsk_queue *q, + struct xskq_iter *it) +{ + while (!xskq_iter_end(it)) { + unsigned int idx = it->tail & q->ring_mask; + + if (q->validation == XSK_VALIDATION_TX) { + struct xdp_rxtx_queue *ring = + (struct xdp_rxtx_queue *)q->ring; + + it->desc_copy.idx = ring->desc[idx].idx; + it->desc_copy.len = ring->desc[idx].len; + it->desc_copy.offset = ring->desc[idx].offset; + + if (xskq_is_valid_tx_entry(q, &it->desc_copy)) + break; + } else { + /* XSK_VALIDATION_RX */ + struct xdp_umem_queue *ring = + (struct xdp_umem_queue *)q->ring; + + if (xskq_is_valid_rx_entry(q, ring->desc[idx])) + break; + } + + it->tail++; + } +} + +static inline void xskq_deq_iter(struct xsk_queue *q, + struct xskq_iter *it, int cnt) +{ + it->tail = q->cached_tail; + it->head = q->cached_tail + xskq_nb_avail(q, cnt); + + /* Order tail and data */ + smp_rmb(); + + xskq_iter_validate(q, it); +} + +static inline void xskq_deq_iter_next(struct xsk_queue *q, + struct xskq_iter *it) +{ + it->tail++; + xskq_iter_validate(q, it); +} + +static inline void xskq_deq_iter_done(struct xsk_queue *q, + struct xskq_iter *it) +{ + q->cached_tail = it->tail; + WRITE_ONCE(q->ring->tail_idx, it->tail); +} + +static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) +{ + return q ? q->invalid_descs : 0; +} + +static inline bool xskq_next_frame_deq(struct xsk_queue *q, + struct xskq_iter *it, + u32 batch_size) +{ + if (xskq_iter_end(it)) { + xskq_deq_iter_done(q, it); + xskq_deq_iter(q, it, batch_size); + return !xskq_iter_end(it); + } + + xskq_deq_iter_next(q, it); + return !xskq_iter_end(it); +} + +static inline void xskq_deq_return_frame(struct xskq_iter *it) +{ + it->tail--; +} + +static inline void xskq_enq_flush(struct xsk_queue *q) +{ + /* Order flags and data */ + smp_wmb(); + + WRITE_ONCE(q->ring->head_idx, q->iter_head_idx); + q->cached_head = q->iter_head_idx; +} + /* Functions operating on RXTX queues only */ static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) @@ -45,6 +226,23 @@ static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) q->nentries * sizeof(struct xdp_desc)); } +static inline int xskq_rxtx_enq_frame(struct xsk_queue *q, + u32 id, u32 len, u16 offset) +{ + struct xdp_rxtx_queue *ring = (struct xdp_rxtx_queue *)q->ring; + unsigned int idx; + + if (xskq_nb_free(q, q->iter_head_idx, 1) == 0) + return -ENOSPC; + + idx = (q->iter_head_idx++) & q->ring_mask; + ring->desc[idx].idx = id; + ring->desc[idx].len = len; + ring->desc[idx].offset = offset; + + return 0; +} + /* Functions operating on UMEM queues only */ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q) @@ -52,6 +250,14 @@ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q) return sizeof(struct xdp_umem_queue) + q->nentries * sizeof(u32); } +static inline u32 xdp_umem_get_id(struct xsk_queue *q, + struct xskq_iter *it) +{ + struct xdp_umem_queue *ring = (struct xdp_umem_queue *)q->ring; + + return ring->desc[it->tail & q->ring_mask]; +} + struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); -- 2.14.1