Buffer only the Ethernet frame so packets can be re-framed using the current
netdev vnet_hdr_len at release time (important across migration). Add a custom
queue deliver callback to prepend a zeroed vnet_hdr when needed, and fix iov
trimming to avoid freeing adjusted pointers or enqueueing empty packets.

Signed-off-by: Cindy Lu <[email protected]>
---
 net/filter-buffer.c | 82 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 4 deletions(-)

diff --git a/net/filter-buffer.c b/net/filter-buffer.c
index 427da24097..f26b212b06 100644
--- a/net/filter-buffer.c
+++ b/net/filter-buffer.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "net/filter.h"
+#include "net/net.h"
 #include "net/queue.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
@@ -28,6 +29,43 @@ struct FilterBufferState {
     QEMUTimer release_timer;
 };
 
+static ssize_t filter_buffer_pass_to_next(NetClientState *sender,
+                                          unsigned flags,
+                                          const struct iovec *iov,
+                                          int iovcnt,
+                                          void *opaque)
+{
+    NetFilterState *nf = opaque;
+    int vnet_hdr_len = 0;
+
+    /*
+     * filter-buffer stores Ethernet frames without the vnet_hdr prefix.
+     * When releasing packets, synthesize a zeroed vnet_hdr of the current
+     * netdev vnet_hdr_len (if any) so the downstream NIC/backend sees the
+     * expected framing even across migration where vnet_hdr_len can change.
+     */
+    if (nf && nf->netdev) {
+        vnet_hdr_len = qemu_get_vnet_hdr_len(nf->netdev);
+    }
+
+    if (vnet_hdr_len > 0) {
+        uint8_t vnet_hdr[128] = { 0 };
+        g_autofree struct iovec *iov2 = g_new(struct iovec, iovcnt + 1);
+
+        g_assert((size_t)vnet_hdr_len <= sizeof(vnet_hdr));
+        iov2[0].iov_base = vnet_hdr;
+        iov2[0].iov_len = vnet_hdr_len;
+        memcpy(&iov2[1], iov, iovcnt * sizeof(*iov));
+
+        return qemu_netfilter_pass_to_next(sender,
+                                           QEMU_NET_PACKET_FLAG_NONE,
+                                           iov2, iovcnt + 1, opaque);
+    }
+
+    return qemu_netfilter_pass_to_next(sender, QEMU_NET_PACKET_FLAG_RAW,
+                                       iov, iovcnt, opaque);
+}
+
 static void filter_buffer_flush(NetFilterState *nf)
 {
     FilterBufferState *s = FILTER_BUFFER(nf);
@@ -64,6 +102,11 @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
                                          NetPacketSent *sent_cb)
 {
     FilterBufferState *s = FILTER_BUFFER(nf);
+    size_t skip = 0;
+    size_t total;
+    struct iovec *iov_copy;
+    struct iovec *iov_copy_base;
+    unsigned int iovcnt_copy;
 
     /*
      * We return size when buffer a packet, the sender will take it as
@@ -79,9 +122,40 @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
      * the packets without caring about the receiver. This is suboptimal.
      * May need more thoughts (e.g keeping sent_cb).
      */
-    qemu_net_queue_append_iov(s->incoming_queue, sender, flags,
-                              iov, iovcnt, NULL);
-    return iov_size(iov, iovcnt);
+    total = iov_size(iov, iovcnt);
+
+    /*
+     * The backend (e.g. tap) may prepend a vnet_hdr. Buffer only the Ethernet
+     * frame so we can re-synthesize the right vnet_hdr_len at release time.
+     */
+    if (!(flags & QEMU_NET_PACKET_FLAG_RAW) && nf->netdev) {
+        skip = qemu_get_vnet_hdr_len(nf->netdev);
+        if (skip > total) {
+            skip = total;
+        }
+    }
+
+    iov_copy = g_new(struct iovec, iovcnt);
+    iov_copy_base = iov_copy;
+    memcpy(iov_copy, iov, iovcnt * sizeof(*iov_copy));
+    iovcnt_copy = iovcnt;
+    if (skip) {
+        iov_discard_front(&iov_copy, &iovcnt_copy, skip);
+    }
+    if (iovcnt_copy == 0) {
+        g_free(iov_copy_base);
+        return total;
+    }
+
+    /*
+     * Stored packets are raw Ethernet frames (no vnet_hdr prefix).
+     */
+    qemu_net_queue_append_iov(s->incoming_queue, sender,
+                              QEMU_NET_PACKET_FLAG_RAW,
+                              iov_copy, iovcnt_copy, NULL);
+    g_free(iov_copy_base);
+
+    return total;
 }
 
 static void filter_buffer_cleanup(NetFilterState *nf)
@@ -126,7 +200,7 @@ static void filter_buffer_setup(NetFilterState *nf, Error 
**errp)
         return;
     }
 
-    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+    s->incoming_queue = qemu_new_net_queue(filter_buffer_pass_to_next, nf);
     filter_buffer_setup_timer(nf);
 }
 
-- 
2.52.0


Reply via email to