On Friday 10 May 2019 12:21 PM, David Gibson wrote: > On Mon, Apr 22, 2019 at 12:33:45PM +0530, Aravinda Prasad wrote: >> Block VM migration requests until the machine check >> error handling is complete as (i) these errors are >> specific to the source hardware and is irrelevant on >> the target hardware, (ii) these errors cause data >> corruption and should be handled before migration. >> >> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> >> --- >> hw/ppc/spapr_events.c | 17 +++++++++++++++++ >> hw/ppc/spapr_rtas.c | 4 ++++ >> include/hw/ppc/spapr.h | 3 +++ >> 3 files changed, 24 insertions(+) >> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >> index 4032db0..45b990c 100644 >> --- a/hw/ppc/spapr_events.c >> +++ b/hw/ppc/spapr_events.c >> @@ -41,6 +41,7 @@ >> #include "qemu/bcd.h" >> #include "hw/ppc/spapr_ovec.h" >> #include <libfdt.h> >> +#include "migration/blocker.h" >> >> #define RTAS_LOG_VERSION_MASK 0xff000000 >> #define RTAS_LOG_VERSION_6 0x06000000 >> @@ -864,6 +865,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, >> bool recovered) >> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) >> { >> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> + int ret; >> + Error *local_err = NULL; >> + >> + error_setg(&spapr->migration_blocker, >> + "Live migration not supported during machine check handling"); >> + ret = migrate_add_blocker(spapr->migration_blocker, &local_err); >> + if (ret < 0) { >> + /* >> + * We don't want to abort and let the migration to continue. In a >> + * rare case, the machine check handler will run on the target >> + * hardware. Though this is not preferable, it is better than >> aborting >> + * the migration or killing the VM. >> + */ >> + error_free(spapr->migration_blocker); >> + fprintf(stderr, "Warning: Machine check during VM migration\n"); > > Use report_err() instead of a raw fprintf(). > >> + } >> >> while (spapr->mc_status != -1) { >> /* >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c >> index 997cf19..1229a0e 100644 >> --- a/hw/ppc/spapr_rtas.c >> +++ b/hw/ppc/spapr_rtas.c >> @@ -50,6 +50,7 @@ >> #include "target/ppc/mmu-hash64.h" >> #include "target/ppc/mmu-book3s-v3.h" >> #include "kvm_ppc.h" >> +#include "migration/blocker.h" >> >> static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState >> *spapr, >> uint32_t token, uint32_t nargs, >> @@ -396,6 +397,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, >> spapr->mc_status = -1; >> qemu_cond_signal(&spapr->mc_delivery_cond); >> rtas_st(rets, 0, RTAS_OUT_SUCCESS); >> + migrate_del_blocker(spapr->migration_blocker); >> + error_free(spapr->migration_blocker); >> + spapr->migration_blocker = NULL; >> } >> } >> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h >> index 9d16ad1..dda5fd2 100644 >> --- a/include/hw/ppc/spapr.h >> +++ b/include/hw/ppc/spapr.h >> @@ -10,6 +10,7 @@ >> #include "hw/ppc/spapr_irq.h" >> #include "hw/ppc/spapr_xive.h" /* For SpaprXive */ >> #include "hw/ppc/xics.h" /* For ICSState */ >> +#include "qapi/error.h" >> >> struct SpaprVioBus; >> struct SpaprPhbState; >> @@ -213,6 +214,8 @@ struct SpaprMachineState { >> SpaprCapabilities def, eff, mig; >> >> unsigned gpu_numa_id; >> + >> + Error *migration_blocker; > > This name doesn't seem good - it's specific to fwnmi, not any other > migration blockers we might have in future. It also always contains > the same string - could you just initialize that in a global and just > do the migrate_add_blocker() / migrate_del_blocker() instead? I retained it in SpaprMachineState instead of a global variable because we add the blocker in spapr_events.c and delete it in spapr_rtas.c But I have renamed it to fwnmi_migration_blocker. Regards, Aravinda > >> }; >> >> #define H_SUCCESS 0 >> > -- Regards, Aravinda