On Mon, Dec 02, 2024 at 07:01:36PM -0300, Fabiano Rosas wrote: > If the destination side fails at migration_ioc_process_incoming() > before starting the coroutine, it will report the error but QEMU will > not exit. > > Set the migration state to FAILED and exit the process if > exit-on-error allows. > > CC: Thomas Huth <th...@redhat.com> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2633 > Reported-by: Daniel P. Berrangé <berra...@redhat.com> > Signed-off-by: Fabiano Rosas <faro...@suse.de>
(I skipped the postcopy patches as of now, until we finish the discussion in patch 2) > --- > migration/channel.c | 11 ++++++----- > migration/migration.c | 31 ++++++++++++++++++------------- > migration/migration.h | 2 +- > 3 files changed, 25 insertions(+), 19 deletions(-) > > diff --git a/migration/channel.c b/migration/channel.c > index f9de064f3b..6d7f9172d8 100644 > --- a/migration/channel.c > +++ b/migration/channel.c > @@ -40,13 +40,14 @@ void migration_channel_process_incoming(QIOChannel *ioc) > > if (migrate_channel_requires_tls_upgrade(ioc)) { > migration_tls_channel_process_incoming(s, ioc, &local_err); > + > + if (local_err) { > + error_report_err(local_err); > + } What if tls processing failed here, do we have similar issue that qemu will stall? Do we want to cover that too? > + > } else { > migration_ioc_register_yank(ioc); > - migration_ioc_process_incoming(ioc, &local_err); > - } > - > - if (local_err) { > - error_report_err(local_err); > + migration_ioc_process_incoming(ioc); > } > } > > diff --git a/migration/migration.c b/migration/migration.c > index 8a61cc26d7..cd88ebc875 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -943,7 +943,7 @@ static bool migration_should_start_incoming(bool > main_channel) > return true; > } > > -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) > +void migration_ioc_process_incoming(QIOChannel *ioc) > { > MigrationIncomingState *mis = migration_incoming_get_current(); > Error *local_err = NULL; > @@ -966,10 +966,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > * issue is not possible. > */ > ret = migration_channel_read_peek(ioc, (void *)&channel_magic, > - sizeof(channel_magic), errp); > - > + sizeof(channel_magic), &local_err); > if (ret != 0) { > - return; > + goto err; > } > > default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); > @@ -977,8 +976,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > default_channel = !mis->from_src_file; > } > > - if (multifd_recv_setup(errp) != 0) { > - return; > + if (multifd_recv_setup(&local_err) != 0) { > + goto err; > } > > if (default_channel) { > @@ -995,18 +994,24 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > postcopy_preempt_new_channel(mis, f); > } > if (local_err) { > - error_propagate(errp, local_err); > - return; > + goto err; > } > } > > - if (migration_should_start_incoming(default_channel)) { > - /* If it's a recovery, we're done */ > - if (postcopy_try_recover()) { > - return; > - } > + if (migration_should_start_incoming(default_channel) && > + !postcopy_try_recover()) { > migration_incoming_process(); > } > + > + return; > + > +err: > + error_report_err(local_err); > + migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP, > + MIGRATION_STATUS_FAILED); > + if (mis->exit_on_error) { > + exit(EXIT_FAILURE); > + } > } > > /** > diff --git a/migration/migration.h b/migration/migration.h > index 0956e9274b..c367e5ea40 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -477,7 +477,7 @@ void migrate_set_state(MigrationStatus *state, > MigrationStatus old_state, > MigrationStatus new_state); > > void migration_fd_process_incoming(QEMUFile *f); > -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp); > +void migration_ioc_process_incoming(QIOChannel *ioc); > void migration_incoming_process(void); > > bool migration_has_all_channels(void); > -- > 2.35.3 > -- Peter Xu