From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com> Allow capping the maximum total size of in-flight VFIO device state buffers queued at the destination, otherwise a malicious QEMU source could theoretically cause the target QEMU to allocate unlimited amounts of memory for buffers-in-flight.
Since this is not expected to be a realistic threat in most of VFIO live migration use cases and the right value depends on the particular setup disable this limit by default by setting it to UINT64_MAX. Reviewed-by: Fabiano Rosas <faro...@suse.de> Reviewed-by: Avihai Horon <avih...@nvidia.com> Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com> Link: https://lore.kernel.org/qemu-devel/4f7cad490988288f58e36b162d7a888ed7e7fd17.1752589295.git.maciej.szmigi...@oracle.com Signed-off-by: Cédric Le Goater <c...@redhat.com> --- docs/devel/migration/vfio.rst | 13 +++++++++++++ include/hw/vfio/vfio-device.h | 1 + hw/vfio/migration-multifd.c | 21 +++++++++++++++++++-- hw/vfio/pci.c | 9 +++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst index dae3a988307fedf00f83926b96fd3d2b9a1c56f7..0790e5031d8f81b257228f124020586c951647bc 100644 --- a/docs/devel/migration/vfio.rst +++ b/docs/devel/migration/vfio.rst @@ -248,6 +248,19 @@ The multifd VFIO device state transfer is controlled by AUTO, which means that VFIO device state transfer via multifd channels is attempted in configurations that otherwise support it. +Since the target QEMU needs to load device state buffers in-order it needs to +queue incoming buffers until they can be loaded into the device. +This means that a malicious QEMU source could theoretically cause the target +QEMU to allocate unlimited amounts of memory for such buffers-in-flight. + +The "x-migration-max-queued-buffers-size" property allows capping the total size +of these VFIO device state buffers queued at the destination. + +Because a malicious QEMU source causing OOM on the target is not expected to be +a realistic threat in most of VFIO live migration use cases and the right value +depends on the particular setup by default this queued buffers size limit is +disabled by setting it to UINT64_MAX. + Some host platforms (like ARM64) require that VFIO device config is loaded only after all iterables were loaded, during non-iterables loading phase. Such interlocking is controlled by "x-migration-load-config-after-iter" VFIO diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index dac3fdce1539b19937870c0e38027e0a6b6ed1a9..6e4d5ccdac6eaae32fb2d3a59b9c7c85e13e156a 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -68,6 +68,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; OnOffAuto migration_load_config_after_iter; + uint64_t migration_max_queued_buffers_size; bool migration_events; bool use_region_fds; VFIODeviceOps *ops; diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index e539befaa925ac739e3bc87ddb2abbb3d50f7cf5..d522671b8d620ca7a1ebdc0c0a90ba48e9bd741e 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -72,6 +72,7 @@ typedef struct VFIOMultifd { QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ uint32_t load_buf_idx; uint32_t load_buf_idx_last; + size_t load_buf_queued_pending_buffers_size; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -128,6 +129,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, VFIOMigration *migration = vbasedev->migration; VFIOMultifd *multifd = migration->multifd; VFIOStateBuffer *lb; + size_t data_size = packet_total_size - sizeof(*packet); vfio_state_buffers_assert_init(&multifd->load_bufs); if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { @@ -143,8 +145,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, assert(packet->idx >= multifd->load_buf_idx); - lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); - lb->len = packet_total_size - sizeof(*packet); + multifd->load_buf_queued_pending_buffers_size += data_size; + if (multifd->load_buf_queued_pending_buffers_size > + vbasedev->migration_max_queued_buffers_size) { + error_setg(errp, + "%s: queuing state buffer %" PRIu32 + " would exceed the size max of %" PRIu64, + vbasedev->name, packet->idx, + vbasedev->migration_max_queued_buffers_size); + return false; + } + + lb->data = g_memdup2(&packet->data, data_size); + lb->len = data_size; lb->is_present = true; return true; @@ -328,6 +341,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, assert(wr_ret <= buf_len); buf_len -= wr_ret; buf_cur += wr_ret; + + assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret); + multifd->load_buf_queued_pending_buffers_size -= wr_ret; } trace_vfio_load_state_device_buffer_load_end(vbasedev->name, @@ -497,6 +513,7 @@ static VFIOMultifd *vfio_multifd_new(void) multifd->load_buf_idx = 0; multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers_size = 0; qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); multifd->load_bufs_iter_done = false; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 09acad002a47fd333d426cee2dcc9cacbbcb2e2f..be05002b9819fafa45cf2fb4d2a0acdc475c558c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3645,6 +3645,8 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice, vbasedev.migration_load_config_after_iter, ON_OFF_AUTO_AUTO), + DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers_size, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3828,6 +3830,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) "non-iterables loading phase) when " "doing live migration of device state " "via multifd channels"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-max-queued-buffers-size", + "Maximum size of in-flight VFIO " + "device state buffers queued at the " + "destination when doing live " + "migration of device state via " + "multifd channels"); } static const TypeInfo vfio_pci_dev_info = { -- 2.50.1