Add support for an optional stats struct embedded in the refill queue
region, allowing userspace to monitor copy-fallback in real-time.

Userspace queries the stats struct size and alignment via
IO_URING_QUERY_ZCRX_NOTIF (notif_stats_size / notif_stats_alignment),
then provides a stats_offset in zcrx_notification_desc pointing to a
location within the refill queue region.

The kernel updates the stats counters in-place on every copy-fallback
event.

Signed-off-by: Clément Léger <[email protected]>
---
 include/uapi/linux/io_uring/query.h | 12 +++++++
 include/uapi/linux/io_uring/zcrx.h  | 15 ++++++--
 io_uring/query.c                    | 16 +++++++++
 io_uring/zcrx.c                     | 54 +++++++++++++++++++++++++++--
 io_uring/zcrx.h                     |  1 +
 5 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/io_uring/query.h 
b/include/uapi/linux/io_uring/query.h
index 95500759cc13..1a68eca7c6b4 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -23,6 +23,7 @@ enum {
        IO_URING_QUERY_OPCODES                  = 0,
        IO_URING_QUERY_ZCRX                     = 1,
        IO_URING_QUERY_SCQ                      = 2,
+       IO_URING_QUERY_ZCRX_NOTIF               = 3,
 
        __IO_URING_QUERY_MAX,
 };
@@ -62,6 +63,17 @@ struct io_uring_query_zcrx {
        __u64 __resv2;
 };
 
+struct io_uring_query_zcrx_notif {
+       /* Bitmask of supported ZCRX_NOTIF_* flags */
+       __u32 notif_flags;
+       /* Size of io_uring_zcrx_notif_stats */
+       __u32 notif_stats_size;
+       /* Required alignment for the stats struct within the region (ie 
stats_offset) */
+       __u32 notif_stats_off_alignment;
+       __u32 __resv1;
+       __u64 __resv2[4];
+};
+
 struct io_uring_query_scq {
        /* The SQ/CQ rings header size */
        __u64 hdr_size;
diff --git a/include/uapi/linux/io_uring/zcrx.h 
b/include/uapi/linux/io_uring/zcrx.h
index 3f7b72b09878..384e185a180c 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -75,11 +75,22 @@ enum zcrx_notification_type {
        __ZCRX_NOTIF_TYPE_LAST,
 };
 
+enum zcrx_notification_desc_flags {
+       /* If set, stats_offset holds a valid offset to a notif_stats struct */
+       ZCRX_NOTIF_DESC_FLAG_STATS = 1 << 0,
+};
+
+struct io_uring_zcrx_notif_stats {
+       __u64   copy_count;     /* cumulative copy-fallback CQEs */
+       __u64   copy_bytes;     /* cumulative bytes copied */
+};
+
 struct zcrx_notification_desc {
        __u64   user_data;
        __u32   type_mask;
-       __u32   __resv1;
-       __u64   __resv2[10];
+       __u32   flags; /* see enum zcrx_notification_desc_flags */
+       __u64   stats_offset; /* offset from the beginning of refill ring 
region for stats */
+       __u64   __resv2[9];
 };
 
 /*
diff --git a/io_uring/query.c b/io_uring/query.c
index c1704d088374..d17a83645bcd 100644
--- a/io_uring/query.c
+++ b/io_uring/query.c
@@ -9,6 +9,7 @@
 union io_query_data {
        struct io_uring_query_opcode opcodes;
        struct io_uring_query_zcrx zcrx;
+       struct io_uring_query_zcrx_notif zcrx_notif;
        struct io_uring_query_scq scq;
 };
 
@@ -44,6 +45,18 @@ static ssize_t io_query_zcrx(union io_query_data *data)
        return sizeof(*e);
 }
 
+static ssize_t io_query_zcrx_notif(union io_query_data *data)
+{
+       struct io_uring_query_zcrx_notif *e = &data->zcrx_notif;
+
+       e->notif_flags = ZCRX_NOTIF_TYPE_MASK;
+       e->notif_stats_size = sizeof(struct io_uring_zcrx_notif_stats);
+       e->notif_stats_off_alignment = __alignof__(struct 
io_uring_zcrx_notif_stats);
+       e->__resv1 = 0;
+       memset(&e->__resv2, 0, sizeof(e->__resv2));
+       return sizeof(*e);
+}
+
 static ssize_t io_query_scq(union io_query_data *data)
 {
        struct io_uring_query_scq *e = &data->scq;
@@ -83,6 +96,9 @@ static int io_handle_query_entry(union io_query_data *data, 
void __user *uhdr,
        case IO_URING_QUERY_ZCRX:
                ret = io_query_zcrx(data);
                break;
+       case IO_URING_QUERY_ZCRX_NOTIF:
+               ret = io_query_zcrx_notif(data);
+               break;
        case IO_URING_QUERY_SCQ:
                ret = io_query_scq(data);
                break;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index f31f2ca0f7ec..2881ad76bacc 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -415,6 +415,7 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
        io_free_region(ifq->user, &ifq->rq_region);
        ifq->rq.ring = IO_URING_PTR_POISON;
        ifq->rq.rqes = IO_URING_PTR_POISON;
+       ifq->notif_stats = IO_URING_PTR_POISON;
 }
 
 static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
@@ -855,6 +856,33 @@ static int zcrx_register_netdev(struct io_zcrx_ifq *ifq,
        return ret;
 }
 
+static int zcrx_validate_notif_stats(struct io_zcrx_ifq *ifq,
+                                    const struct io_uring_zcrx_ifq_reg *reg,
+                                    const struct zcrx_notification_desc *notif)
+{
+       size_t stats_off = notif->stats_offset;
+       size_t used, end;
+
+       used = reg->offsets.rqes +
+              sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries;
+
+       if (!IS_ALIGNED(stats_off, __alignof__(struct 
io_uring_zcrx_notif_stats)))
+               return -EINVAL;
+       if (stats_off < used)
+               return -ERANGE;
+       if (check_add_overflow(stats_off,
+                              sizeof(struct io_uring_zcrx_notif_stats),
+                              &end))
+               return -ERANGE;
+       if (end > io_region_size(&ifq->rq_region))
+               return -ERANGE;
+
+       ifq->notif_stats = io_region_get_ptr(&ifq->rq_region) + stats_off;
+       memset(ifq->notif_stats, 0, sizeof(*ifq->notif_stats));
+
+       return 0;
+}
+
 int io_register_zcrx(struct io_ring_ctx *ctx,
                     struct io_uring_zcrx_ifq_reg __user *arg)
 {
@@ -908,7 +936,13 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
                return -EFAULT;
        if (notif.type_mask & ~ZCRX_NOTIF_TYPE_MASK)
                return -EINVAL;
-       if (notif.__resv1 || !mem_is_zero(&notif.__resv2, 
sizeof(notif.__resv2)))
+       if (notif.flags & ~ZCRX_NOTIF_DESC_FLAG_STATS)
+               return -EINVAL;
+       if (!(notif.flags & ZCRX_NOTIF_DESC_FLAG_STATS)) {
+               if (notif.stats_offset)
+                       return -EINVAL;
+       }
+       if (!mem_is_zero(&notif.__resv2, sizeof(notif.__resv2)))
                return -EINVAL;
 
        ifq = io_zcrx_ifq_alloc(ctx);
@@ -939,6 +973,12 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
        if (ret)
                goto err;
 
+       if (notif.flags & ZCRX_NOTIF_DESC_FLAG_STATS) {
+               ret = zcrx_validate_notif_stats(ifq, &reg, &notif);
+               if (ret)
+                       goto err;
+       }
+
        ifq->kern_readable = !(area.flags & IORING_ZCRX_AREA_DMABUF);
 
        if (!(reg.flags & ZCRX_REG_NODEV)) {
@@ -1154,6 +1194,11 @@ static void zcrx_notif_tw(struct io_tw_req tw_req, 
io_tw_token_t tw)
        kmem_cache_free(req_cachep, req);
 }
 
+static void zcrx_stat_add(__u64 *p, s64 v)
+{
+       WRITE_ONCE(*p, READ_ONCE(*p) + v);
+}
+
 static void zcrx_send_notif(struct io_zcrx_ifq *ifq, unsigned type)
 {
        gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO;
@@ -1537,8 +1582,13 @@ static int io_zcrx_copy_frag(struct io_kiocb *req, 
struct io_zcrx_ifq *ifq,
        int ret;
 
        ret = io_zcrx_copy_chunk(req, ifq, page, off + skb_frag_off(frag), len);
-       if (ret > 0)
+       if (ret > 0) {
+               if (ifq->notif_stats) {
+                       zcrx_stat_add(&ifq->notif_stats->copy_count, 1);
+                       zcrx_stat_add(&ifq->notif_stats->copy_bytes, ret);
+               }
                zcrx_send_notif(ifq, ZCRX_NOTIF_COPY);
+       }
 
        return ret;
 }
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 203b3049e14b..e1aab76c310d 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -81,6 +81,7 @@ struct io_zcrx_ifq {
        u32                             allowed_notif_mask;
        u32                             fired_notifs;
        u64                             notif_data;
+       struct io_uring_zcrx_notif_stats *notif_stats;
 };
 
 #if defined(CONFIG_IO_URING_ZCRX)
-- 
2.53.0-Meta


Reply via email to