Add link state change (LSC) notification support using a netlink
socket subscribed to RTMGRP_LINK events.
When LSC is enabled in the device configuration, the driver:
- Creates a NETLINK_ROUTE socket filtering for RTM_NEWLINK and
RTM_DELLINK messages
- Registers the socket fd with the EAL interrupt handler
- On interrupt, drains all pending netlink messages and updates
the link status for matching interface index changes
The interrupt is enabled during dev_start and disabled during
dev_stop, with proper cleanup of the netlink socket and EAL
callback registration.
Signed-off-by: Stephen Hemminger <[email protected]>
---
doc/guides/nics/features/rtap.ini | 1 +
drivers/net/rtap/meson.build | 1 +
drivers/net/rtap/rtap.h | 5 +
drivers/net/rtap/rtap_ethdev.c | 36 +++++++-
drivers/net/rtap/rtap_intr.c | 147 ++++++++++++++++++++++++++++++
5 files changed, 187 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/rtap/rtap_intr.c
diff --git a/doc/guides/nics/features/rtap.ini
b/doc/guides/nics/features/rtap.ini
index b8eaa805fe..36a14e9696 100644
--- a/doc/guides/nics/features/rtap.ini
+++ b/doc/guides/nics/features/rtap.ini
@@ -5,6 +5,7 @@
;
[Features]
Link status = Y
+Link status event = Y
MTU update = Y
Promiscuous mode = Y
Allmulticast mode = Y
diff --git a/drivers/net/rtap/meson.build b/drivers/net/rtap/meson.build
index 8e2b15f382..86d400323c 100644
--- a/drivers/net/rtap/meson.build
+++ b/drivers/net/rtap/meson.build
@@ -19,6 +19,7 @@ endif
sources = files(
'rtap_ethdev.c',
+ 'rtap_intr.c',
'rtap_rxtx.c',
)
diff --git a/drivers/net/rtap/rtap.h b/drivers/net/rtap/rtap.h
index 99f413f001..f73b5e317d 100644
--- a/drivers/net/rtap/rtap.h
+++ b/drivers/net/rtap/rtap.h
@@ -13,6 +13,7 @@
#include <ethdev_driver.h>
#include <rte_ether.h>
+#include <rte_interrupts.h>
#include <rte_log.h>
@@ -62,6 +63,7 @@ struct rtap_tx_queue {
struct rtap_pmd {
int keep_fd; /* keep alive file descriptor */
+ struct rte_intr_handle *intr_handle; /* LSC interrupt handle */
char ifname[IFNAMSIZ]; /* name assigned by kernel */
struct rte_ether_addr eth_addr; /* address assigned by kernel */
@@ -86,4 +88,7 @@ int rtap_tx_queue_setup(struct rte_eth_dev *dev, uint16_t
queue_id,
const struct rte_eth_txconf *tx_conf);
void rtap_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id);
+/* rtap_intr.c */
+int rtap_lsc_set(struct rte_eth_dev *dev, int set);
+
#endif /* _RTAP_H_ */
diff --git a/drivers/net/rtap/rtap_ethdev.c b/drivers/net/rtap/rtap_ethdev.c
index 277a280772..8c22021655 100644
--- a/drivers/net/rtap/rtap_ethdev.c
+++ b/drivers/net/rtap/rtap_ethdev.c
@@ -24,6 +24,7 @@
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
+#include <rte_interrupts.h>
#include <rte_kvargs.h>
#include <rte_log.h>
@@ -141,11 +142,16 @@ rtap_change_flags(struct rte_eth_dev *dev, uint32_t
flags, uint32_t mask)
ifr.ifr_flags |= flags;
ret = ioctl(sock, SIOCSIFFLAGS, &ifr);
- if (ret < 0)
+ if (ret < 0) {
PMD_LOG_ERRNO(ERR, "Unable to set flags for %s", ifr.ifr_name);
+ goto error;
+ }
+ close(sock);
+ return 0;
error:
+ ret = -errno;
close(sock);
- return (ret < 0) ? -errno : 0;
+ return ret;
}
static int
@@ -277,11 +283,18 @@ rtap_macaddr_set(struct rte_eth_dev *dev, struct
rte_ether_addr *addr)
static int
rtap_dev_start(struct rte_eth_dev *dev)
{
- int ret = rtap_set_link_up(dev);
+ int ret;
+ ret = rtap_lsc_set(dev, 1);
if (ret != 0)
return ret;
+ ret = rtap_set_link_up(dev);
+ if (ret != 0) {
+ rtap_lsc_set(dev, 0);
+ return ret;
+ }
+
dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
@@ -298,6 +311,7 @@ rtap_dev_stop(struct rte_eth_dev *dev)
dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
+ rtap_lsc_set(dev, 0);
rtap_set_link_down(dev);
for (uint16_t i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -508,6 +522,9 @@ rtap_dev_close(struct rte_eth_dev *dev)
close(pmd->keep_fd);
pmd->keep_fd = -1;
}
+
+ rte_intr_instance_free(pmd->intr_handle);
+ pmd->intr_handle = NULL;
}
free(dev->process_private);
@@ -585,6 +602,17 @@ rtap_create(struct rte_eth_dev *dev, const char *tap_name,
uint8_t persist)
pmd->keep_fd = -1;
pmd->rx_drop_base = 0;
+ /* Allocate interrupt instance for link state change events */
+ pmd->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
+ if (pmd->intr_handle == NULL) {
+ PMD_LOG(ERR, "Failed to allocate intr handle");
+ goto error;
+ }
+ rte_intr_type_set(pmd->intr_handle, RTE_INTR_HANDLE_EXT);
+ rte_intr_fd_set(pmd->intr_handle, -1);
+ dev->intr_handle = pmd->intr_handle;
+ data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+
dev->dev_ops = &rtap_ops;
/* Get the initial fd used to keep the tap device around */
@@ -623,6 +651,8 @@ rtap_create(struct rte_eth_dev *dev, const char *tap_name,
uint8_t persist)
error:
if (pmd->keep_fd != -1)
close(pmd->keep_fd);
+ rte_intr_instance_free(pmd->intr_handle);
+ pmd->intr_handle = NULL;
return -1;
}
diff --git a/drivers/net/rtap/rtap_intr.c b/drivers/net/rtap/rtap_intr.c
new file mode 100644
index 0000000000..8a27b811e1
--- /dev/null
+++ b/drivers/net/rtap/rtap_intr.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2026 Stephen Hemminger
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+
+#include <rte_interrupts.h>
+
+#include "rtap.h"
+
+/*
+ * Create a netlink socket subscribed to link state change events.
+ * Returns socket fd or -1 on failure.
+ */
+static int
+rtap_netlink_init(unsigned int groups)
+{
+ int fd;
+ struct sockaddr_nl sa = {
+ .nl_family = AF_NETLINK,
+ .nl_groups = groups,
+ };
+
+ fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
+ NETLINK_ROUTE);
+ if (fd < 0) {
+ PMD_LOG_ERRNO(ERR, "netlink socket");
+ return -1;
+ }
+
+ if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ PMD_LOG_ERRNO(ERR, "netlink bind");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/*
+ * Drain all pending netlink messages from socket.
+ * For each RTM_NEWLINK/RTM_DELLINK that matches our interface,
+ * update link status.
+ */
+static void
+rtap_netlink_recv(int fd, struct rte_eth_dev *dev)
+{
+ struct rtap_pmd *pmd = dev->data->dev_private;
+ unsigned int if_index = if_nametoindex(pmd->ifname);
+ char buf[4096];
+ ssize_t len;
+
+ while ((len = recv(fd, buf, sizeof(buf), 0)) > 0) {
+ for (struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+ NLMSG_OK(nh, (unsigned int)len);
+ nh = NLMSG_NEXT(nh, len)) {
+ struct ifinfomsg *ifi;
+
+ if (nh->nlmsg_type != RTM_NEWLINK &&
+ nh->nlmsg_type != RTM_DELLINK)
+ continue;
+
+ ifi = NLMSG_DATA(nh);
+ if ((unsigned int)ifi->ifi_index != if_index)
+ continue;
+
+ /* Link state changed for our interface */
+ rtap_link_update(dev, 0);
+ }
+ }
+}
+
+/* Interrupt handler called by EAL when netlink socket is readable */
+static void
+rtap_lsc_handler(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct rtap_pmd *pmd = dev->data->dev_private;
+ int fd = rte_intr_fd_get(pmd->intr_handle);
+
+ if (fd >= 0)
+ rtap_netlink_recv(fd, dev);
+}
+
+/*
+ * Enable or disable link state change interrupt.
+ * When enabled, creates a netlink socket subscribed to RTMGRP_LINK
+ * and registers it with the EAL interrupt handler.
+ */
+int
+rtap_lsc_set(struct rte_eth_dev *dev, int set)
+{
+ struct rtap_pmd *pmd = dev->data->dev_private;
+ int ret;
+
+ /* If LSC not configured, just disable if active */
+ if (!dev->data->dev_conf.intr_conf.lsc) {
+ if (rte_intr_fd_get(pmd->intr_handle) != -1)
+ goto disable;
+ return 0;
+ }
+
+ if (set) {
+ int fd = rtap_netlink_init(RTMGRP_LINK);
+ if (fd < 0)
+ return -1;
+
+ rte_intr_fd_set(pmd->intr_handle, fd);
+ ret = rte_intr_callback_register(pmd->intr_handle,
+ rtap_lsc_handler, dev);
+ if (ret < 0) {
+ PMD_LOG(ERR, "Failed to register LSC callback: %s",
+ rte_strerror(-ret));
+ close(fd);
+ rte_intr_fd_set(pmd->intr_handle, -1);
+ return ret;
+ }
+ return 0;
+ }
+
+disable:
+ unsigned int retry = 10;
+ do {
+ ret = rte_intr_callback_unregister(pmd->intr_handle,
+ rtap_lsc_handler, dev);
+ if (ret >= 0)
+ break;
+ if (ret == -EAGAIN && retry-- > 0)
+ rte_delay_ms(100);
+ else {
+ PMD_LOG(ERR, "LSC callback unregister failed: %d", ret);
+ break;
+ }
+ } while (true);
+
+ if (rte_intr_fd_get(pmd->intr_handle) >= 0) {
+ close(rte_intr_fd_get(pmd->intr_handle));
+ rte_intr_fd_set(pmd->intr_handle, -1);
+ }
+
+ return 0;
+}
--
2.51.0