Add support for the DPDK Rx interrupt mechanism, enabling
power-aware applications (e.g. l3fwd-power) to sleep until
packets arrive rather than busy-polling.

Each Rx queue creates an eventfd during queue setup and registers
it with its io_uring instance via io_uring_register_eventfd().
When the kernel posts a CQE (completing a read, i.e. a packet
arrived), it signals the eventfd. These per-queue eventfds are
wired into a VDEV interrupt handle during dev_start when the
application has set intr_conf.rxq.

The enable op drains the eventfd counter to re-arm notification;
disable is a no-op since the application simply stops polling.
The eventfd is always created unconditionally so it is available
if the application enables Rx interrupts later.

The Rx interrupt handle is kept separate from the existing LSC
netlink interrupt handle to avoid coupling the two mechanisms.

Signed-off-by: Stephen Hemminger <[email protected]>
---
 doc/guides/nics/features/rtap.ini |   1 +
 drivers/net/rtap/rtap.h           |   6 ++
 drivers/net/rtap/rtap_ethdev.c    |  16 ++++
 drivers/net/rtap/rtap_intr.c      | 120 ++++++++++++++++++++++++++++++
 drivers/net/rtap/rtap_rxtx.c      |  31 +++++++-
 5 files changed, 173 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/features/rtap.ini 
b/doc/guides/nics/features/rtap.ini
index fe0c88a8fc..48fe3f1b33 100644
--- a/doc/guides/nics/features/rtap.ini
+++ b/doc/guides/nics/features/rtap.ini
@@ -6,6 +6,7 @@
 [Features]
 Link status          = Y
 Link status event    = Y
+Rx interrupt         = Y
 MTU update           = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
diff --git a/drivers/net/rtap/rtap.h b/drivers/net/rtap/rtap.h
index f73b5e317d..f37cac87ad 100644
--- a/drivers/net/rtap/rtap.h
+++ b/drivers/net/rtap/rtap.h
@@ -42,6 +42,7 @@ extern int rtap_logtype;
 struct rtap_rx_queue {
        struct rte_mempool *mb_pool;    /* rx buffer pool */
        struct io_uring io_ring;        /* queue of posted read's */
+       int intr_fd;                    /* eventfd for Rx interrupt */
        uint16_t port_id;
        uint16_t queue_id;
 
@@ -64,6 +65,7 @@ struct rtap_tx_queue {
 struct rtap_pmd {
        int keep_fd;                    /* keep alive file descriptor */
        struct rte_intr_handle *intr_handle; /* LSC interrupt handle */
+       struct rte_intr_handle *rx_intr_handle; /* Rx queue interrupt handle */
        char ifname[IFNAMSIZ];          /* name assigned by kernel */
        struct rte_ether_addr eth_addr; /* address assigned by kernel */
 
@@ -90,5 +92,9 @@ void rtap_tx_queue_release(struct rte_eth_dev *dev, uint16_t 
queue_id);
 
 /* rtap_intr.c */
 int rtap_lsc_set(struct rte_eth_dev *dev, int set);
+int rtap_rx_intr_vec_install(struct rte_eth_dev *dev);
+void rtap_rx_intr_vec_uninstall(struct rte_eth_dev *dev);
+int rtap_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
+int rtap_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
 
 #endif /* _RTAP_H_ */
diff --git a/drivers/net/rtap/rtap_ethdev.c b/drivers/net/rtap/rtap_ethdev.c
index 9b8ad1f452..8a9fb5be85 100644
--- a/drivers/net/rtap/rtap_ethdev.c
+++ b/drivers/net/rtap/rtap_ethdev.c
@@ -293,8 +293,18 @@ rtap_dev_start(struct rte_eth_dev *dev)
        if (ret != 0)
                return ret;
 
+       /* Install Rx interrupt vector if requested by application */
+       if (dev->data->dev_conf.intr_conf.rxq) {
+               ret = rtap_rx_intr_vec_install(dev);
+               if (ret != 0) {
+                       rtap_lsc_set(dev, 0);
+                       return ret;
+               }
+       }
+
        ret = rtap_set_link_up(dev);
        if (ret != 0) {
+               rtap_rx_intr_vec_uninstall(dev);
                rtap_lsc_set(dev, 0);
                return ret;
        }
@@ -315,6 +325,7 @@ rtap_dev_stop(struct rte_eth_dev *dev)
 
        dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
 
+       rtap_rx_intr_vec_uninstall(dev);
        rtap_lsc_set(dev, 0);
        rtap_set_link_down(dev);
 
@@ -527,6 +538,9 @@ rtap_dev_close(struct rte_eth_dev *dev)
                        pmd->keep_fd = -1;
                }
 
+               rte_intr_instance_free(pmd->rx_intr_handle);
+               pmd->rx_intr_handle = NULL;
+
                rte_intr_instance_free(pmd->intr_handle);
                pmd->intr_handle = NULL;
        }
@@ -597,6 +611,8 @@ static const struct eth_dev_ops rtap_ops = {
        .rx_queue_release       = rtap_rx_queue_release,
        .tx_queue_setup         = rtap_tx_queue_setup,
        .tx_queue_release       = rtap_tx_queue_release,
+       .rx_queue_intr_enable   = rtap_rx_queue_intr_enable,
+       .rx_queue_intr_disable  = rtap_rx_queue_intr_disable,
 };
 
 static int
diff --git a/drivers/net/rtap/rtap_intr.c b/drivers/net/rtap/rtap_intr.c
index 8a27b811e1..231666efae 100644
--- a/drivers/net/rtap/rtap_intr.c
+++ b/drivers/net/rtap/rtap_intr.c
@@ -145,3 +145,123 @@ rtap_lsc_set(struct rte_eth_dev *dev, int set)
 
        return 0;
 }
+
+/*
+ * Install per-queue Rx interrupt vector.
+ *
+ * Each Rx queue has an eventfd registered with its io_uring instance.
+ * When a CQE is posted (packet received), the kernel signals the eventfd.
+ * This function wires those eventfds into an rte_intr_handle so that
+ * DPDK's interrupt framework (rte_epoll_wait) can poll them.
+ *
+ * Only called when dev_conf.intr_conf.rxq is set.
+ */
+int
+rtap_rx_intr_vec_install(struct rte_eth_dev *dev)
+{
+       struct rtap_pmd *pmd = dev->data->dev_private;
+       uint16_t nb_rx = dev->data->nb_rx_queues;
+
+       if (pmd->rx_intr_handle != NULL) {
+               PMD_LOG(DEBUG, "Rx interrupt vector already installed");
+               return 0;
+       }
+
+       pmd->rx_intr_handle = 
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
+       if (pmd->rx_intr_handle == NULL) {
+               PMD_LOG(ERR, "Failed to allocate Rx intr handle");
+               return -ENOMEM;
+       }
+
+       if (rte_intr_type_set(pmd->rx_intr_handle, RTE_INTR_HANDLE_VDEV) < 0)
+               goto error;
+
+       if (rte_intr_nb_efd_set(pmd->rx_intr_handle, nb_rx) < 0)
+               goto error;
+
+       if (rte_intr_max_intr_set(pmd->rx_intr_handle, nb_rx + 1) < 0)
+               goto error;
+
+       for (uint16_t i = 0; i < nb_rx; i++) {
+               struct rtap_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (rxq == NULL || rxq->intr_fd < 0) {
+                       PMD_LOG(ERR, "Rx queue %u not ready for interrupts", i);
+                       goto error;
+               }
+
+               if (rte_intr_efds_index_set(pmd->rx_intr_handle, i,
+                                           rxq->intr_fd) < 0) {
+                       PMD_LOG(ERR, "Failed to set efd for queue %u", i);
+                       goto error;
+               }
+       }
+
+       dev->intr_handle = pmd->rx_intr_handle;
+       PMD_LOG(DEBUG, "Rx interrupt vector installed for %u queues", nb_rx);
+       return 0;
+
+error:
+       rte_intr_instance_free(pmd->rx_intr_handle);
+       pmd->rx_intr_handle = NULL;
+       return -1;
+}
+
+/*
+ * Remove per-queue Rx interrupt vector.
+ * Restores dev->intr_handle to the LSC handle.
+ */
+void
+rtap_rx_intr_vec_uninstall(struct rte_eth_dev *dev)
+{
+       struct rtap_pmd *pmd = dev->data->dev_private;
+
+       if (pmd->rx_intr_handle == NULL)
+               return;
+
+       /* Restore LSC handle as device interrupt handle */
+       dev->intr_handle = pmd->intr_handle;
+
+       rte_intr_instance_free(pmd->rx_intr_handle);
+       pmd->rx_intr_handle = NULL;
+
+       PMD_LOG(DEBUG, "Rx interrupt vector uninstalled");
+}
+
+/*
+ * Enable Rx interrupt for a queue.
+ *
+ * Drain any pending eventfd notification so the next CQE
+ * triggers a fresh wakeup in rte_epoll_wait().
+ */
+int
+rtap_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+       struct rtap_rx_queue *rxq = dev->data->rx_queues[queue_id];
+       uint64_t val;
+
+       if (rxq == NULL || rxq->intr_fd < 0)
+               return -EINVAL;
+
+       /* Drain the eventfd counter to re-arm notification */
+       if (read(rxq->intr_fd, &val, sizeof(val)) < 0 && errno != EAGAIN) {
+               PMD_LOG(ERR, "eventfd drain failed queue %u: %s",
+                       queue_id, strerror(errno));
+               return -errno;
+       }
+
+       return 0;
+}
+
+/*
+ * Disable Rx interrupt for a queue.
+ *
+ * Nothing to do - the eventfd stays registered with io_uring
+ * but the application simply stops polling it.
+ */
+int
+rtap_rx_queue_intr_disable(struct rte_eth_dev *dev __rte_unused,
+                          uint16_t queue_id __rte_unused)
+{
+       return 0;
+}
diff --git a/drivers/net/rtap/rtap_rxtx.c b/drivers/net/rtap/rtap_rxtx.c
index c972ab4ca0..87d181eded 100644
--- a/drivers/net/rtap/rtap_rxtx.c
+++ b/drivers/net/rtap/rtap_rxtx.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/eventfd.h>
 #include <liburing.h>
 #include <sys/uio.h>
 #include <linux/virtio_net.h>
@@ -369,6 +370,7 @@ rtap_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_id, uint16_t nb_rx_d
        rxq->mb_pool = mb_pool;
        rxq->port_id = dev->data->port_id;
        rxq->queue_id = queue_id;
+       rxq->intr_fd = -1;
        dev->data->rx_queues[queue_id] = rxq;
 
        if (io_uring_queue_init(nb_rx_desc, &rxq->io_ring, 0) != 0) {
@@ -376,10 +378,26 @@ rtap_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_id, uint16_t nb_rx_d
                goto error_rxq_free;
        }
 
+       /*
+        * Create an eventfd for Rx interrupt notification.
+        * io_uring will signal this fd whenever a CQE is posted,
+        * enabling power-aware applications to sleep until packets arrive.
+        */
+       rxq->intr_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+       if (rxq->intr_fd < 0) {
+               PMD_LOG(ERR, "eventfd failed: %s", strerror(errno));
+               goto error_iouring_exit;
+       }
+
+       if (io_uring_register_eventfd(&rxq->io_ring, rxq->intr_fd) < 0) {
+               PMD_LOG(ERR, "io_uring_register_eventfd failed: %s", 
strerror(errno));
+               goto error_eventfd_close;
+       }
+
        mbufs = calloc(nb_rx_desc, sizeof(struct rte_mbuf *));
        if (mbufs == NULL) {
                PMD_LOG(ERR, "Rx mbuf pointer alloc failed");
-               goto error_iouring_exit;
+               goto error_eventfd_close;
        }
 
        /* open shared tap fd maybe already setup */
@@ -429,6 +447,11 @@ rtap_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_id, uint16_t nb_rx_d
        }
        rtap_queue_close(dev, queue_id);
        free(mbufs);
+error_eventfd_close:
+       if (rxq->intr_fd >= 0) {
+               close(rxq->intr_fd);
+               rxq->intr_fd = -1;
+       }
 error_iouring_exit:
        io_uring_queue_exit(&rxq->io_ring);
 error_rxq_free:
@@ -503,6 +526,12 @@ rtap_rx_queue_release(struct rte_eth_dev *dev, uint16_t 
queue_id)
        if (rxq == NULL)
                return;
 
+       if (rxq->intr_fd >= 0) {
+               io_uring_unregister_eventfd(&rxq->io_ring);
+               close(rxq->intr_fd);
+               rxq->intr_fd = -1;
+       }
+
        rtap_cancel_all(&rxq->io_ring);
        io_uring_queue_exit(&rxq->io_ring);
 
-- 
2.51.0

Reply via email to