On 10/23/2020 12:21 AM, Alex Williamson wrote:
On Thu, 22 Oct 2020 16:41:57 +0530
Kirti Wankhede <[email protected]> wrote:
Define flags to be used as delimiter in migration stream for VFIO devices.
Added .save_setup and .save_cleanup functions. Map & unmap migration
region from these functions at source during saving or pre-copy phase.
Set VFIO device state depending on VM's state. During live migration, VM is
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
Signed-off-by: Kirti Wankhede <[email protected]>
Reviewed-by: Neo Jia <[email protected]>
---
hw/vfio/migration.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/vfio/trace-events | 2 ++
2 files changed, 98 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 7c4fa0d08ea6..2e1054bf7f43 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -8,12 +8,15 @@
*/
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
#include <linux/vfio.h>
#include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h"
#include "cpu.h"
#include "migration/migration.h"
+#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "migration/blocker.h"
@@ -25,6 +28,22 @@
#include "trace.h"
#include "hw/hw.h"
+/*
+ * Flags to be used as unique delimiters for VFIO devices in the migration
+ * stream. These flags are composed as:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10 => Magic ID, represents emulated (virtual) function IO
+ * 0x0000 => 16-bits reserved for flags
+ *
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
+ * certain state information is marked by _END_OF_STATE.
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
+
static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
off_t off, bool iswrite)
{
@@ -129,6 +148,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
uint32_t mask,
return 0;
}
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ trace_vfio_save_setup(vbasedev->name);
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+ if (migration->region.mmaps) {
+ /*
+ * vfio_region_mmap() called from migration thread. Memory API called
+ * from vfio_regio_mmap() need it when called from outdide the main
loop
+ * thread.
+ */
Thanks for adding this detail, maybe refine slightly as:
Calling vfio_region_mmap() from migration thread. Memory APIs called
from this function require locking the iothread when called from
outside the main loop thread.
Does that capture the intent?
Ok.
+ qemu_mutex_lock_iothread();
+ ret = vfio_region_mmap(&migration->region);
+ qemu_mutex_unlock_iothread();
+ if (ret) {
+ error_report("%s: Failed to mmap VFIO migration region: %s",
+ vbasedev->name, strerror(-ret));
+ error_report("%s: Falling back to slow path", vbasedev->name);
+ }
+ }
+
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+ VFIO_DEVICE_STATE_SAVING);
+ if (ret) {
+ error_report("%s: Failed to set state SAVING", vbasedev->name);
+ return ret;
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+
+ if (migration->region.mmaps) {
+ vfio_region_unmap(&migration->region);
+ }
Are we in a different thread context here that we don't need that same
iothread locking?
qemu_savevm_state_setup() is called without holding iothread lock and
qemu_savevm_state_cleanup() is called holding iothread lock, so we don't
need lock here.
+ trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+ .save_setup = vfio_save_setup,
+ .save_cleanup = vfio_save_cleanup,
+};
+
+/* ---------------------------------------------------------------------- */
+
static void vfio_vmstate_change(void *opaque, int running, RunState state)
{
VFIODevice *vbasedev = opaque;
@@ -219,6 +301,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
int ret;
Object *obj;
VFIOMigration *migration;
+ char id[256] = "";
+ g_autofree char *path = NULL, *oid;
AIUI, oid must also be initialized as a g_autofree variable.
if (!vbasedev->ops->vfio_get_object) {
return -EINVAL;
@@ -248,6 +332,18 @@ static int vfio_migration_init(VFIODevice *vbasedev,
vbasedev->migration = migration;
migration->vbasedev = vbasedev;
+
+ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
+ if (oid) {
+ path = g_strdup_printf("%s/vfio", oid);
+ } else {
+ path = g_strdup("vfio");
If we get here then all vfio devices have the same id string. Isn't
that a problem? Thanks,
Most of the bus types has get_dev_path() callback implemented which is
called from get_id, so there are very less chance to get here.
With above change, id string we get looks like '0000:00:04.0/vfio',
trace logs below:
qemu_loadvm_state_section_startfull 61.942 pid=625231 section_id=0x2f
idstr=b'0000:00:04.0/vfio' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 1.242 pid=625231 section_id=0x30
idstr=b'0000:00:05.0/vfio' instance_id=0x0 version_id=0x1
where '0000:00:04.0'shows location within guest, so that it gets
preserved and used during resume.
In the worst when it is not present, idstr remains same but instance_id
changes:
qemu_loadvm_state_section_startfull 54.931 pid=609474 section_id=0x2f
idstr=b'vfio' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 1.180 pid=609474 section_id=0x30
idstr=b'vfio' instance_id=0x1 version_id=0x1
But there is no other way to know location of the device within guest.
Dave, any suggestions here?
Thanks,
Kirti
Alex
+ }
+ strpadcpy(id, sizeof(id), path, '\0');
+
+ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
+ vbasedev);
+
migration->vm_state =
qemu_add_vm_change_state_handler(vfio_vmstate_change,
vbasedev);
migration->migration_state.notify = vfio_migration_state_notifier;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 78d7d83b5ef8..f148b5e828c1 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -151,3 +151,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s)
Region %d"
vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t
dev_state) " (%s) running %d reason %s device state %d"
vfio_migration_state_notifier(const char *name, const char *state) " (%s) state
%s"
+vfio_save_setup(const char *name) " (%s)"
+vfio_save_cleanup(const char *name) " (%s)"