From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com>

Allow capping the maximum total size of in-flight VFIO device state buffers
queued at the destination, otherwise a malicious QEMU source could
theoretically cause the target QEMU to allocate unlimited amounts of memory
for buffers-in-flight.

Since this is not expected to be a realistic threat in most of VFIO live
migration use cases and the right value depends on the particular setup
disable this limit by default by setting it to UINT64_MAX.

Reviewed-by: Fabiano Rosas <faro...@suse.de>
Reviewed-by: Avihai Horon <avih...@nvidia.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com>
Link: 
https://lore.kernel.org/qemu-devel/4f7cad490988288f58e36b162d7a888ed7e7fd17.1752589295.git.maciej.szmigi...@oracle.com
Signed-off-by: Cédric Le Goater <c...@redhat.com>
---
 docs/devel/migration/vfio.rst | 13 +++++++++++++
 include/hw/vfio/vfio-device.h |  1 +
 hw/vfio/migration-multifd.c   | 21 +++++++++++++++++++--
 hw/vfio/pci.c                 |  9 +++++++++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
index 
dae3a988307fedf00f83926b96fd3d2b9a1c56f7..0790e5031d8f81b257228f124020586c951647bc
 100644
--- a/docs/devel/migration/vfio.rst
+++ b/docs/devel/migration/vfio.rst
@@ -248,6 +248,19 @@ The multifd VFIO device state transfer is controlled by
 AUTO, which means that VFIO device state transfer via multifd channels is
 attempted in configurations that otherwise support it.
 
+Since the target QEMU needs to load device state buffers in-order it needs to
+queue incoming buffers until they can be loaded into the device.
+This means that a malicious QEMU source could theoretically cause the target
+QEMU to allocate unlimited amounts of memory for such buffers-in-flight.
+
+The "x-migration-max-queued-buffers-size" property allows capping the total 
size
+of these VFIO device state buffers queued at the destination.
+
+Because a malicious QEMU source causing OOM on the target is not expected to be
+a realistic threat in most of VFIO live migration use cases and the right value
+depends on the particular setup by default this queued buffers size limit is
+disabled by setting it to UINT64_MAX.
+
 Some host platforms (like ARM64) require that VFIO device config is loaded only
 after all iterables were loaded, during non-iterables loading phase.
 Such interlocking is controlled by "x-migration-load-config-after-iter" VFIO
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 
dac3fdce1539b19937870c0e38027e0a6b6ed1a9..6e4d5ccdac6eaae32fb2d3a59b9c7c85e13e156a
 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -68,6 +68,7 @@ typedef struct VFIODevice {
     OnOffAuto enable_migration;
     OnOffAuto migration_multifd_transfer;
     OnOffAuto migration_load_config_after_iter;
+    uint64_t migration_max_queued_buffers_size;
     bool migration_events;
     bool use_region_fds;
     VFIODeviceOps *ops;
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 
e539befaa925ac739e3bc87ddb2abbb3d50f7cf5..d522671b8d620ca7a1ebdc0c0a90ba48e9bd741e
 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -72,6 +72,7 @@ typedef struct VFIOMultifd {
     QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
     uint32_t load_buf_idx;
     uint32_t load_buf_idx_last;
+    size_t load_buf_queued_pending_buffers_size;
 } VFIOMultifd;
 
 static void vfio_state_buffer_clear(gpointer data)
@@ -128,6 +129,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice 
*vbasedev,
     VFIOMigration *migration = vbasedev->migration;
     VFIOMultifd *multifd = migration->multifd;
     VFIOStateBuffer *lb;
+    size_t data_size = packet_total_size - sizeof(*packet);
 
     vfio_state_buffers_assert_init(&multifd->load_bufs);
     if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
@@ -143,8 +145,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice 
*vbasedev,
 
     assert(packet->idx >= multifd->load_buf_idx);
 
-    lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
-    lb->len = packet_total_size - sizeof(*packet);
+    multifd->load_buf_queued_pending_buffers_size += data_size;
+    if (multifd->load_buf_queued_pending_buffers_size >
+        vbasedev->migration_max_queued_buffers_size) {
+        error_setg(errp,
+                   "%s: queuing state buffer %" PRIu32
+                   " would exceed the size max of %" PRIu64,
+                   vbasedev->name, packet->idx,
+                   vbasedev->migration_max_queued_buffers_size);
+        return false;
+    }
+
+    lb->data = g_memdup2(&packet->data, data_size);
+    lb->len = data_size;
     lb->is_present = true;
 
     return true;
@@ -328,6 +341,9 @@ static bool vfio_load_state_buffer_write(VFIODevice 
*vbasedev,
         assert(wr_ret <= buf_len);
         buf_len -= wr_ret;
         buf_cur += wr_ret;
+
+        assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
+        multifd->load_buf_queued_pending_buffers_size -= wr_ret;
     }
 
     trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
@@ -497,6 +513,7 @@ static VFIOMultifd *vfio_multifd_new(void)
 
     multifd->load_buf_idx = 0;
     multifd->load_buf_idx_last = UINT32_MAX;
+    multifd->load_buf_queued_pending_buffers_size = 0;
     qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
 
     multifd->load_bufs_iter_done = false;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 
09acad002a47fd333d426cee2dcc9cacbbcb2e2f..be05002b9819fafa45cf2fb4d2a0acdc475c558c
 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3645,6 +3645,8 @@ static const Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", 
VFIOPCIDevice,
                             vbasedev.migration_load_config_after_iter,
                             ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+                     vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
     DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
                      vbasedev.migration_events, false),
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3828,6 +3830,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, 
const void *data)
                                           "non-iterables loading phase) when "
                                           "doing live migration of device 
state "
                                           "via multifd channels");
+    object_class_property_set_description(klass, /* 10.1 */
+                                          
"x-migration-max-queued-buffers-size",
+                                          "Maximum size of in-flight VFIO "
+                                          "device state buffers queued at the "
+                                          "destination when doing live "
+                                          "migration of device state via "
+                                          "multifd channels");
 }
 
 static const TypeInfo vfio_pci_dev_info = {
-- 
2.50.1


Reply via email to