On Tue, Feb 24, 2026 at 4:35 AM Koushik Dutta <[email protected]> wrote:
>
> Implement VirtIO Network Notification Coalescing (Bit 53).
> This allows the guest to manage interrupt frequency using ethtool
> -C for both RX and TX paths.
>
> - Added VIRTIO_NET_F_NOTF_COAL to host features.
> - Implemented VIRTIO_NET_CTRL_NOTF_COAL class handling in
> virtio_net_handle_ctrl_iov.
> - Added logic to store and apply rx/tx usecs and max_packets.
> - Added packet counters and threshold logic for both RX and TX data paths.
> - Dynamic Dispatcher: Implemented a dispatcher mechanism that
> dynamically switches/activates the notification callback logic
> only after the guest enables TX coalescing via ethtool.
>
> This reduces interrupt overhead by batching notifications based on
> either a packet count or a time-based threshold.
>
> Signed-off-by: Koushik Dutta <[email protected]>
> ---
> hw/net/virtio-net.c | 110 ++++++++++++++++++++++++++++-----
> include/hw/virtio/virtio-net.h | 7 +++
> 2 files changed, 100 insertions(+), 17 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index eccb48ad42..e30aa198f3 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -157,6 +157,15 @@ static void flush_or_purge_queued_packets(NetClientState
> *nc)
> * - we could suppress RX interrupt if we were so inclined.
> */
>
> +static void virtio_net_rx_timer_cb(void *opaque)
> +{
> + VirtIONet *n = opaque;
> + VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +
> + n->rx_pkt_cnt = 0;
> + virtio_notify(vdev, n->vqs[0].rx_vq);
> +}
> +
> static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
> {
> VirtIONet *n = VIRTIO_NET(vdev);
> @@ -1081,6 +1090,29 @@ static int virtio_net_handle_offloads(VirtIONet *n,
> uint8_t cmd,
> }
> }
>
> +static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
> + struct iovec *iov, unsigned int iov_cnt)
> +{
> + struct virtio_net_ctrl_coal coal;
> + size_t s;
> +
> + s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
> + if (s != sizeof(coal)) {
> + return VIRTIO_NET_ERR;
> + }
> +
> + if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
> + n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
> + n->rx_coal_packets = le32_to_cpu(coal.max_packets);
> + } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
> + n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
> + n->tx_coal_packets = le32_to_cpu(coal.max_packets);
> + n->tx_timeout = n->tx_coal_usecs * 1000;
> + }
> +
> + return VIRTIO_NET_OK;
> +}
> +
> static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
> struct iovec *iov, unsigned int iov_cnt)
> {
> @@ -1582,6 +1614,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
> } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
> status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
> + } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
> + status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
> }
>
> s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
> @@ -2041,7 +2075,18 @@ static ssize_t virtio_net_receive_rcu(NetClientState
> *nc, const uint8_t *buf,
> }
>
> virtqueue_flush(q->rx_vq, i);
> - virtio_notify(vdev, q->rx_vq);
> +
> + /* rx coalescing */
> + n->rx_pkt_cnt += i;
> + if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
> + timer_del(n->rx_index_timer);
> + virtio_net_rx_timer_cb(n);
> + } else {
> + if (n->rx_pkt_cnt == i) {
> + timer_mod(n->rx_index_timer,
> + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) +
> n->rx_coal_usecs);
> + }
> + }
>
> return size;
>
> @@ -2900,6 +2945,7 @@ static void virtio_net_tx_timer(void *opaque)
> if (ret == -EBUSY || ret == -EINVAL) {
> return;
> }
> + n->tx_pkt_cnt -= ret;
> /*
> * If we flush a full burst of packets, assume there are
> * more coming and immediately rearm
> @@ -2919,6 +2965,7 @@ static void virtio_net_tx_timer(void *opaque)
> ret = virtio_net_flush_tx(q);
> if (ret > 0) {
> virtio_queue_set_notification(q->tx_vq, 0);
> + n->tx_pkt_cnt -= ret;
> q->tx_waiting = 1;
> timer_mod(q->tx_timer,
> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> @@ -2974,27 +3021,45 @@ static void virtio_net_tx_bh(void *opaque)
> }
> }
>
> -static void virtio_net_add_queue(VirtIONet *n, int index)
> +static void virtio_net_handle_tx_dispatch(VirtIODevice *vdev, VirtQueue *vq)
> {
> - VirtIODevice *vdev = VIRTIO_DEVICE(n);
> + VirtIONet *n = VIRTIO_NET(vdev);
>
> - n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
> - virtio_net_handle_rx);
> + n->tx_pkt_cnt++;
> + bool use_timer = (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) ||
> + (n->tx_coal_usecs > 0) || (n->tx_coal_packets > 0);
>
> - if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
> - n->vqs[index].tx_vq =
> - virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> - virtio_net_handle_tx_timer);
> - n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> - virtio_net_tx_timer,
> - &n->vqs[index]);
> + if (use_timer && n->tx_pkt_cnt < n->tx_coal_packets) {
> + virtio_net_handle_tx_timer(vdev, vq);
> } else {
> - n->vqs[index].tx_vq =
> - virtio_add_queue(vdev, n->net_conf.tx_queue_size,
> - virtio_net_handle_tx_bh);
> - n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh,
> &n->vqs[index],
> -
> &DEVICE(vdev)->mem_reentrancy_guard);
> + n->tx_pkt_cnt = 0;
> + virtio_net_handle_tx_bh(vdev, vq);
> }
> +}
> +
> +static void virtio_net_add_queue(VirtIONet *n, int index)
> +{
> + VirtIODevice *vdev = VIRTIO_DEVICE(n);
> +
> + n->vqs[index].rx_vq =
> + virtio_add_queue(vdev,
> + n->net_conf.rx_queue_size,
> + virtio_net_handle_rx);
> +
> + n->vqs[index].tx_vq =
> + virtio_add_queue(vdev,
> + n->net_conf.tx_queue_size,
> + virtio_net_handle_tx_dispatch);
> +
> + n->vqs[index].tx_timer =
> + timer_new_ns(QEMU_CLOCK_VIRTUAL,
> + virtio_net_tx_timer,
> + &n->vqs[index]);
> +
> + n->vqs[index].tx_bh =
> + qemu_bh_new_guarded(virtio_net_tx_bh,
> + &n->vqs[index],
> + &DEVICE(vdev)->mem_reentrancy_guard);
>
> n->vqs[index].tx_waiting = 0;
> n->vqs[index].n = n;
> @@ -3089,6 +3154,7 @@ static void virtio_net_get_features(VirtIODevice *vdev,
> uint64_t *features,
> virtio_features_or(features, features, n->host_features_ex);
>
> virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
> + virtio_add_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);
>
> if (!peer_has_vnet_hdr(n)) {
> virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
> @@ -4048,6 +4114,14 @@ static void virtio_net_device_realize(DeviceState
> *dev, Error **errp)
> n->rss_data.specified_hash_types.on_bits |
> n->rss_data.specified_hash_types.auto_bits;
> }
> + n->rx_pkt_cnt = 0;
> + n->tx_pkt_cnt = 0;
> + n->rx_coal_usecs = 0;
> + n->tx_coal_usecs = 0;
> + n->rx_coal_packets = 1;
> + n->tx_coal_packets = 0;
> + n->rx_index_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
> + virtio_net_rx_timer_cb, n);
> }
>
> static void virtio_net_device_unrealize(DeviceState *dev)
> @@ -4262,6 +4336,8 @@ static const Property virtio_net_properties[] = {
> VIRTIO_NET_F_GUEST_USO6, true),
> DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
> VIRTIO_NET_F_HOST_USO, true),
> + DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
> + VIRTIO_NET_F_NOTF_COAL, true),
If we start as "true," we need to deal with migration compatibility,
which is known to be hard even when using machine types.
So I'd suggest starting with "false"
Thanks
> DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
> rss_data.specified_hash_types,
> VIRTIO_NET_HASH_REPORT_IPv4 - 1,
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 5b8ab7bda7..33d01d080b 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -231,6 +231,13 @@ struct VirtIONet {
> struct EBPFRSSContext ebpf_rss;
> uint32_t nr_ebpf_rss_fds;
> char **ebpf_rss_fds;
> + QEMUTimer *rx_index_timer;
> + uint32_t rx_coal_usecs;
> + uint32_t rx_coal_packets;
> + uint32_t rx_pkt_cnt;
> + uint32_t tx_coal_usecs;
> + uint32_t tx_coal_packets;
> + uint32_t tx_pkt_cnt;
> };
>
> size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> --
> 2.53.0
>