On Tue, Jan 27, 2026 at 03:03:08PM +0100, Juraj Marcin wrote:
> From: Juraj Marcin <[email protected]>
>
> Currently there is no universal way for the destination to tell the
> source it has started. In precopy it could be deduced from the RP_SHUT
> message and in postcopy from the response to the ping just before the
> POSTCOPY_RUN command, but neither method is precise. Moreover, there is
> no way to send more data after the destination has started with precopy
> migration.
>
> This patch adds new message type to the return-path which tells the
> source that the destination VM has just started (or can be started if
> autostart is false). Source VM can use this message to precisely
> calculate the downtime regardless of if postcopy is used and can also
> send more data, for example network packets.
>
> Signed-off-by: Juraj Marcin <[email protected]>
I do not think it matters that VM started, at least not for
the issue in question.
What matters is that a packet is transmitted on behalf of the VM,
on the specific interface.
> ---
> hw/core/machine.c | 4 +++-
> migration/migration.c | 34 ++++++++++++++++++++++++++++++----
> migration/migration.h | 9 +++++++++
> migration/options.c | 8 ++++++++
> migration/options.h | 1 +
> migration/savevm.c | 3 +++
> 6 files changed, 54 insertions(+), 5 deletions(-)
>
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 6411e68856..dc73217a5f 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -38,7 +38,9 @@
> #include "hw/acpi/generic_event_device.h"
> #include "qemu/audio.h"
>
> -GlobalProperty hw_compat_10_2[] = {};
> +GlobalProperty hw_compat_10_2[] = {
> + { "migration", "send-vm-started", "off" },
> +};
> const size_t hw_compat_10_2_len = G_N_ELEMENTS(hw_compat_10_2);
>
> GlobalProperty hw_compat_10_1[] = {
> diff --git a/migration/migration.c b/migration/migration.c
> index b103a82fc0..4871db2365 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -82,6 +82,7 @@ enum mig_rp_message_type {
> MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
> MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
> MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */
> + MIG_RP_MSG_VM_STARTED, /* tell source destination has started */
>
> MIG_RP_MSG_MAX
> };
> @@ -750,6 +751,10 @@ static void process_incoming_migration_bh(void *opaque)
> runstate_set(global_state_get_runstate());
> }
> trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
> + if (mis->to_src_file && migrate_send_vm_started()) {
> + migrate_send_rp_vm_started(mis);
> + }
> +
> /*
> * This must happen after any state changes since as soon as an external
> * observer sees this event they might start to prod at the VM assuming
> @@ -996,6 +1001,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState
> *mis, uint32_t value)
> migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
> }
>
> +void migrate_send_rp_vm_started(MigrationIncomingState *mis)
> +{
> + migrate_send_rp_message(mis, MIG_RP_MSG_VM_STARTED, 0, NULL);
> +}
> +
> bool migration_is_running(void)
> {
> MigrationState *s = current_migration;
> @@ -1660,6 +1670,9 @@ int migrate_init(MigrationState *s, Error **errp)
> s->postcopy_package_loaded = false;
> qemu_event_reset(&s->postcopy_package_loaded_event);
>
> + s->dest_vm_started = false;
> + qemu_event_reset(&s->dest_vm_started_event);
> +
> return 0;
> }
>
> @@ -2368,6 +2381,12 @@ static void *source_return_path_thread(void *opaque)
> trace_source_return_path_thread_switchover_acked();
> break;
>
> + case MIG_RP_MSG_VM_STARTED:
> + migration_downtime_end(ms);
> + ms->dest_vm_started = true;
> + qemu_event_set(&ms->dest_vm_started_event);
> + break;
> +
> default:
> break;
> }
> @@ -2591,7 +2610,9 @@ static int postcopy_start(MigrationState *ms, Error
> **errp)
> */
> migration_call_notifiers(MIG_EVENT_PRECOPY_DONE, NULL);
>
> - migration_downtime_end(ms);
> + if (!ms->rp_state.rp_thread_created || !migrate_send_vm_started()) {
> + migration_downtime_end(ms);
> + }
>
> if (migrate_postcopy_ram()) {
> /*
> @@ -3086,7 +3107,9 @@ static void migration_completion_end(MigrationState *s)
> * - correct ordering of s->mbps update vs. s->state;
> */
> bql_lock();
> - migration_downtime_end(s);
> + if (!s->rp_state.rp_thread_created || !migrate_send_vm_started()) {
> + migration_downtime_end(s);
> + }
> s->total_time = end_time - s->start_time;
> transfer_time = s->total_time - s->setup_time;
> if (transfer_time) {
> @@ -3300,9 +3323,10 @@ static void migration_iteration_finish(MigrationState
> *s)
> case MIGRATION_STATUS_FAILED:
> case MIGRATION_STATUS_CANCELLED:
> case MIGRATION_STATUS_CANCELLING:
> - if (!migration_block_activate(&local_err)) {
> + if (s->dest_vm_started || !migration_block_activate(&local_err)) {
> /*
> - * Re-activate the block drives if they're inactivated.
> + * Re-activate the block drives if they're inactivated and the
> dest
> + * vm has not reported that it has started.
> *
> * If it fails (e.g. in case of a split brain, where dest QEMU
> * might have taken some of the drive locks and running!), do
> @@ -3853,6 +3877,7 @@ static void migration_instance_finalize(Object *obj)
> qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
> error_free(ms->error);
> qemu_event_destroy(&ms->postcopy_package_loaded_event);
> + qemu_event_destroy(&ms->dest_vm_started_event);
> }
>
> static void migration_instance_init(Object *obj)
> @@ -3875,6 +3900,7 @@ static void migration_instance_init(Object *obj)
> qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
> qemu_mutex_init(&ms->qemu_file_lock);
> qemu_event_init(&ms->postcopy_package_loaded_event, 0);
> + qemu_event_init(&ms->dest_vm_started_event, false);
> }
>
> /*
> diff --git a/migration/migration.h b/migration/migration.h
> index b6888daced..a3fab4f27e 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -522,6 +522,14 @@ struct MigrationState {
> * anything as input.
> */
> bool has_block_bitmap_mapping;
> +
> + /*
> + * Do send VM_START message on the return-path when dest VM finishes
> + * loading device state and switches out of INMIGRATE run state.
> + */
> + bool send_vm_started;
> + bool dest_vm_started;
> + QemuEvent dest_vm_started_event;
> };
>
> void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
> @@ -564,6 +572,7 @@ void migrate_send_rp_recv_bitmap(MigrationIncomingState
> *mis,
> char *block_name);
> void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
> int migrate_send_rp_switchover_ack(MigrationIncomingState *mis);
> +void migrate_send_rp_vm_started(MigrationIncomingState *mis);
>
> void dirty_bitmap_mig_before_vm_start(void);
> void dirty_bitmap_mig_cancel_outgoing(void);
> diff --git a/migration/options.c b/migration/options.c
> index 1ffe85a2d8..a5a233183b 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -108,6 +108,7 @@ const Property migration_properties[] = {
> preempt_pre_7_2, false),
> DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
> multifd_clean_tls_termination, true),
> + DEFINE_PROP_BOOL("send-vm-started", MigrationState, send_vm_started,
> true),
>
> /* Migration parameters */
> DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
> @@ -434,6 +435,13 @@ bool migrate_zero_copy_send(void)
> return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
> }
>
> +bool migrate_send_vm_started(void)
> +{
> + MigrationState *s = migrate_get_current();
> +
> + return s->send_vm_started;
> +}
> +
> /* pseudo capabilities */
>
> bool migrate_multifd_flush_after_each_section(void)
> diff --git a/migration/options.h b/migration/options.h
> index b502871097..5fdc8fc6fe 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -42,6 +42,7 @@ bool migrate_return_path(void);
> bool migrate_validate_uuid(void);
> bool migrate_xbzrle(void);
> bool migrate_zero_copy_send(void);
> +bool migrate_send_vm_started(void);
>
> /*
> * pseudo capabilities
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 3dc812a7bb..1020094fc8 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2157,6 +2157,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
> }
>
> trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
> + if (mis->to_src_file && migrate_send_vm_started()) {
> + migrate_send_rp_vm_started(mis);
> + }
> }
>
> /* After all discards we can start running and asking for pages */
> --
> 2.52.0