Buffer only the Ethernet frame so packets can be re-framed using the current netdev vnet_hdr_len at release time (important across migration). Add a custom queue deliver callback to prepend a zeroed vnet_hdr when needed, and fix iov trimming to avoid freeing adjusted pointers or enqueueing empty packets.
Signed-off-by: Cindy Lu <[email protected]> --- net/filter-buffer.c | 82 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 4 deletions(-) diff --git a/net/filter-buffer.c b/net/filter-buffer.c index 427da24097..f26b212b06 100644 --- a/net/filter-buffer.c +++ b/net/filter-buffer.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "net/filter.h" +#include "net/net.h" #include "net/queue.h" #include "qapi/error.h" #include "qemu/timer.h" @@ -28,6 +29,43 @@ struct FilterBufferState { QEMUTimer release_timer; }; +static ssize_t filter_buffer_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + NetFilterState *nf = opaque; + int vnet_hdr_len = 0; + + /* + * filter-buffer stores Ethernet frames without the vnet_hdr prefix. + * When releasing packets, synthesize a zeroed vnet_hdr of the current + * netdev vnet_hdr_len (if any) so the downstream NIC/backend sees the + * expected framing even across migration where vnet_hdr_len can change. + */ + if (nf && nf->netdev) { + vnet_hdr_len = qemu_get_vnet_hdr_len(nf->netdev); + } + + if (vnet_hdr_len > 0) { + uint8_t vnet_hdr[128] = { 0 }; + g_autofree struct iovec *iov2 = g_new(struct iovec, iovcnt + 1); + + g_assert((size_t)vnet_hdr_len <= sizeof(vnet_hdr)); + iov2[0].iov_base = vnet_hdr; + iov2[0].iov_len = vnet_hdr_len; + memcpy(&iov2[1], iov, iovcnt * sizeof(*iov)); + + return qemu_netfilter_pass_to_next(sender, + QEMU_NET_PACKET_FLAG_NONE, + iov2, iovcnt + 1, opaque); + } + + return qemu_netfilter_pass_to_next(sender, QEMU_NET_PACKET_FLAG_RAW, + iov, iovcnt, opaque); +} + static void filter_buffer_flush(NetFilterState *nf) { FilterBufferState *s = FILTER_BUFFER(nf); @@ -64,6 +102,11 @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf, NetPacketSent *sent_cb) { FilterBufferState *s = FILTER_BUFFER(nf); + size_t skip = 0; + size_t total; + struct iovec *iov_copy; + struct iovec *iov_copy_base; + unsigned int iovcnt_copy; /* * We return size when buffer a packet, the sender will take it as @@ -79,9 +122,40 @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf, * the packets without caring about the receiver. This is suboptimal. * May need more thoughts (e.g keeping sent_cb). */ - qemu_net_queue_append_iov(s->incoming_queue, sender, flags, - iov, iovcnt, NULL); - return iov_size(iov, iovcnt); + total = iov_size(iov, iovcnt); + + /* + * The backend (e.g. tap) may prepend a vnet_hdr. Buffer only the Ethernet + * frame so we can re-synthesize the right vnet_hdr_len at release time. + */ + if (!(flags & QEMU_NET_PACKET_FLAG_RAW) && nf->netdev) { + skip = qemu_get_vnet_hdr_len(nf->netdev); + if (skip > total) { + skip = total; + } + } + + iov_copy = g_new(struct iovec, iovcnt); + iov_copy_base = iov_copy; + memcpy(iov_copy, iov, iovcnt * sizeof(*iov_copy)); + iovcnt_copy = iovcnt; + if (skip) { + iov_discard_front(&iov_copy, &iovcnt_copy, skip); + } + if (iovcnt_copy == 0) { + g_free(iov_copy_base); + return total; + } + + /* + * Stored packets are raw Ethernet frames (no vnet_hdr prefix). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, + QEMU_NET_PACKET_FLAG_RAW, + iov_copy, iovcnt_copy, NULL); + g_free(iov_copy_base); + + return total; } static void filter_buffer_cleanup(NetFilterState *nf) @@ -126,7 +200,7 @@ static void filter_buffer_setup(NetFilterState *nf, Error **errp) return; } - s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + s->incoming_queue = qemu_new_net_queue(filter_buffer_pass_to_next, nf); filter_buffer_setup_timer(nf); } -- 2.52.0
