The VFIO_MIGRATION event notifies users when a VFIO device transitions
to a new state.

One use case for this event is to prevent timeouts for RDMA connections
to the migrated device. In this case, an external management application
(not libvirt) consumes the events and disables the RDMA timeout
mechanism when receiving the event for PRE_COPY_P2P state, which
indicates that the device is non-responsive.

This is essential because RDMA connections typically have very low
timeouts (tens of milliseconds), which can be far below migration
downtime.

However, under heavy resource utilization, the device transition to
PRE_COPY_P2P can take hundreds of milliseconds to complete. Since the
VFIO_MIGRATION event is currently sent only after the transition
completes, it arrives too late, after RDMA connections have already
timed out.

To address this, send an additional "prepare" event immediately before
initiating the PRE_COPY_P2P transition. This guarantees timely event
delivery regardless of how long the actual state transition takes.

Signed-off-by: Avihai Horon <[email protected]>
---
Changes from v2 
(https://lore.kernel.org/qemu-devel/[email protected]/):
* Renamed prepare-pre-copy-p2p to pre-copy-p2p-prepare
* Renamed prep parameter to prepare in mig_state_to_qapi_state() and
  vfio_migration_send_event()
* Added short explanatory comment before sending the prepare event in
  vfio_migration_set_state()
* Explicitly used VFIO_DEVICE_STATE_PRE_COPY_P2P as parameter for
  vfio_migration_send_event()

Changes from v1 
(https://lore.kernel.org/qemu-devel/[email protected]/):
* Removed VFIO_MIGRATION_PREPARE event and instead added a new
  PREPARE_PRE_COPY_P2P state which is sent before PRE_COPY_P2P
  transition
* Added details to commit message
---
 qapi/vfio.json      | 13 +++++++++++--
 hw/vfio/migration.c | 26 +++++++++++++++++++-------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/qapi/vfio.json b/qapi/vfio.json
index a1a9c5b673..17b6046871 100644
--- a/qapi/vfio.json
+++ b/qapi/vfio.json
@@ -11,7 +11,13 @@
 ##
 # @QapiVfioMigrationState:
 #
-# An enumeration of the VFIO device migration states.
+# An enumeration of the VFIO device migration states.  In addition to
+# the regular states, there are prepare states (with 'prepare' suffix)
+# which indicate that the device is just about to transition to the
+# corresponding state.  Note that seeing a prepare state for state X
+# doesn't guarantee that the next state will be X, as the state
+# transition can fail and the device may transition to a different
+# state instead.
 #
 # @stop: The device is stopped.
 #
@@ -32,11 +38,14 @@
 #     tracking its internal state and its internal state is available
 #     for reading.
 #
+# @pre-copy-p2p-prepare: The device is just about to move to
+#     pre-copy-p2p state.  (since 11.0)
+#
 # Since: 9.1
 ##
 { 'enum': 'QapiVfioMigrationState',
   'data': [ 'stop', 'running', 'stop-copy', 'resuming', 'running-p2p',
-            'pre-copy', 'pre-copy-p2p' ] }
+            'pre-copy', 'pre-copy-p2p', 'pre-copy-p2p-prepare' ] }
 
 ##
 # @VFIO_MIGRATION:
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b4695030c7..4bd8e24699 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -68,7 +68,7 @@ static const char *mig_state_to_str(enum 
vfio_device_mig_state state)
 }
 
 static QapiVfioMigrationState
-mig_state_to_qapi_state(enum vfio_device_mig_state state)
+mig_state_to_qapi_state(enum vfio_device_mig_state state, bool prepare)
 {
     switch (state) {
     case VFIO_DEVICE_STATE_STOP:
@@ -84,15 +84,17 @@ mig_state_to_qapi_state(enum vfio_device_mig_state state)
     case VFIO_DEVICE_STATE_PRE_COPY:
         return QAPI_VFIO_MIGRATION_STATE_PRE_COPY;
     case VFIO_DEVICE_STATE_PRE_COPY_P2P:
-        return QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
+        return prepare ? QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P_PREPARE :
+                         QAPI_VFIO_MIGRATION_STATE_PRE_COPY_P2P;
     default:
         g_assert_not_reached();
     }
 }
 
-static void vfio_migration_send_event(VFIODevice *vbasedev)
+static void vfio_migration_send_event(VFIODevice *vbasedev,
+                                      enum vfio_device_mig_state state,
+                                      bool prepare)
 {
-    VFIOMigration *migration = vbasedev->migration;
     DeviceState *dev = vbasedev->dev;
     g_autofree char *qom_path = NULL;
     Object *obj;
@@ -106,8 +108,8 @@ static void vfio_migration_send_event(VFIODevice *vbasedev)
     g_assert(obj);
     qom_path = object_get_canonical_path(obj);
 
-    qapi_event_send_vfio_migration(
-        dev->id, qom_path, mig_state_to_qapi_state(migration->device_state));
+    qapi_event_send_vfio_migration(dev->id, qom_path,
+                                   mig_state_to_qapi_state(state, prepare));
 }
 
 static void vfio_migration_set_device_state(VFIODevice *vbasedev,
@@ -119,7 +121,7 @@ static void vfio_migration_set_device_state(VFIODevice 
*vbasedev,
                                           mig_state_to_str(state));
 
     migration->device_state = state;
-    vfio_migration_send_event(vbasedev);
+    vfio_migration_send_event(vbasedev, state, false);
 }
 
 int vfio_migration_set_state(VFIODevice *vbasedev,
@@ -146,6 +148,16 @@ int vfio_migration_set_state(VFIODevice *vbasedev,
         return 0;
     }
 
+    /*
+     * Send a prepare event before initiating the PRE_COPY_P2P transition to
+     * ensure timely event delivery regardless of how long the state transition
+     * takes.
+     */
+    if (new_state == VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+        vfio_migration_send_event(vbasedev, VFIO_DEVICE_STATE_PRE_COPY_P2P,
+                                  true);
+    }
+
     feature->argsz = sizeof(buf);
     feature->flags =
         VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
-- 
2.40.1


Reply via email to