Make sure postcopy threads are released when migrate_cancel is issued. Kick the postcopy_pause semaphore and have the fault thread read 'fault_thread_quit' when joining.
While here fix the comment mentioning userfault_event_fd. Signed-off-by: Fabiano Rosas <faro...@suse.de> --- migration/migration.c | 14 +++++++++++--- migration/postcopy-ram.c | 14 ++++++++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8c5bd0a75c..07fbb5c9f4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -105,7 +105,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp); static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); -static void migrate_fd_cancel(MigrationState *s); +static void migrate_fd_cancel(MigrationState *s, MigrationIncomingState *mis); static bool close_return_path_on_source(MigrationState *s); static void migration_completion_end(MigrationState *s); @@ -317,7 +317,7 @@ void migration_cancel(const Error *error) if (migrate_dirty_limit()) { qmp_cancel_vcpu_dirty_limit(false, -1, NULL); } - migrate_fd_cancel(current_migration); + migrate_fd_cancel(current_migration, current_incoming); } void migration_shutdown(void) @@ -1502,7 +1502,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error) migrate_set_error(s, error); } -static void migrate_fd_cancel(MigrationState *s) +static void migrate_fd_cancel(MigrationState *s, MigrationIncomingState *mis) { int old_state ; @@ -1515,6 +1515,12 @@ static void migrate_fd_cancel(MigrationState *s) } } + if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + MIGRATION_STATUS_CANCELLING); + qemu_sem_post(&mis->postcopy_pause_sem_dst); + } + do { old_state = s->state; if (!migration_is_running()) { @@ -1523,6 +1529,8 @@ static void migrate_fd_cancel(MigrationState *s) /* If the migration is paused, kick it out of the pause */ if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { qemu_sem_post(&s->pause_sem); + } else if (old_state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + qemu_sem_post(&s->postcopy_pause_sem); } migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); } while (s->state != MIGRATION_STATUS_CANCELLING); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a535fd2e30..6882ef977d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -634,6 +634,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) qatomic_set(&mis->fault_thread_quit, 1); postcopy_fault_thread_notify(mis); trace_postcopy_ram_incoming_cleanup_join(); + qemu_sem_post(&mis->postcopy_pause_sem_fault); qemu_thread_join(&mis->fault_thread); if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) { @@ -991,8 +992,7 @@ static void *postcopy_ram_fault_thread(void *opaque) /* * We're mainly waiting for the kernel to give us a faulting HVA, - * however we can be told to quit via userfault_quit_fd which is - * an eventfd + * however we can be told to quit via userfault_event_fd. */ poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); @@ -1008,6 +1008,11 @@ static void *postcopy_ram_fault_thread(void *opaque) * the channel is rebuilt. */ postcopy_pause_fault_thread(mis); + + if (qatomic_read(&mis->fault_thread_quit)) { + trace_postcopy_ram_fault_thread_quit(); + break; + } } if (pfd[1].revents) { @@ -1082,6 +1087,11 @@ retry: if (ret) { /* May be network failure, try to wait for recovery */ postcopy_pause_fault_thread(mis); + + if (qatomic_read(&mis->fault_thread_quit)) { + trace_postcopy_ram_fault_thread_quit(); + break; + } goto retry; } } -- 2.35.3