On Tue, Jun 04, 2019 at 11:01:19AM +0200, Greg Kurz wrote: > On Tue, 4 Jun 2019 11:59:13 +0530 > Aravinda Prasad <aravi...@linux.vnet.ibm.com> wrote: > > > On Monday 03 June 2019 07:30 PM, Greg Kurz wrote: > > > On Wed, 29 May 2019 11:10:40 +0530 > > > Aravinda Prasad <aravi...@linux.vnet.ibm.com> wrote: > > > > > >> Upon a machine check exception (MCE) in a guest address space, > > >> KVM causes a guest exit to enable QEMU to build and pass the > > >> error to the guest in the PAPR defined rtas error log format. > > >> > > >> This patch builds the rtas error log, copies it to the rtas_addr > > >> and then invokes the guest registered machine check handler. The > > >> handler in the guest takes suitable action(s) depending on the type > > >> and criticality of the error. For example, if an error is > > >> unrecoverable memory corruption in an application inside the > > >> guest, then the guest kernel sends a SIGBUS to the application. > > >> For recoverable errors, the guest performs recovery actions and > > >> logs the error. > > >> > > >> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> > > >> --- > > >> hw/ppc/spapr.c | 5 + > > >> hw/ppc/spapr_events.c | 236 > > >> ++++++++++++++++++++++++++++++++++++++++++++++++ > > >> include/hw/ppc/spapr.h | 4 + > > >> 3 files changed, 245 insertions(+) > > >> > > >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > >> index 6b6c962..c97f6a6 100644 > > >> --- a/hw/ppc/spapr.c > > >> +++ b/hw/ppc/spapr.c > > >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState > > >> *machine) > > >> error_report("Could not get size of LPAR rtas '%s'", filename); > > >> exit(1); > > >> } > > >> + > > >> + /* Resize blob to accommodate error log. */ > > >> + g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET); > > > > > > I don't see the point of this assertion... especially with the assignment > > > below. > > > > It is required because we want to ensure that the rtas image size is > > less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas > > image with rtas error when we hit machine check exception. But I think a > > comment in the code will help. Will add it. > > I'd rather exit QEMU properly instead of aborting then. Also this is only > needed if the guest has a chance to use FWNMI, ie. the spapr cap is > set.
I think assert() is appropriate in this case. If it fails it means something is wrong in the code, not with configuration. > > > > > > > > >> + spapr->rtas_size = RTAS_ERROR_LOG_MAX; > > > > > > As requested by David, this should only be done when the spapr cap is set, > > > so that 4.0 machine types and older continue to use the current size. > > > > Due to other issue of re-allocating the blob and as this is not that > > much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always. > > > > Link to the discussion on this: > > http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html > > > > Indeed, and in the next mail in that thread, David writes: > > > No, that's not right. It's impractical to change the allocation > > depending on whether fwnmi is currently active. But you *can* (and > > should) base the allocation on whether fwnmi is *possible* - that is, > > the value of the spapr cap. > > ie, allocate RTAS_ERROR_LOG_MAX when the spapr cap is set, allocate > the file size otherwise. > > > > > > >> + > > >> spapr->rtas_blob = g_malloc(spapr->rtas_size); > > >> if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < > > >> 0) { > > >> error_report("Could not load LPAR rtas '%s'", filename); > > >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > > >> index a18446b..573c0b7 100644 > > >> --- a/hw/ppc/spapr_events.c > > >> +++ b/hw/ppc/spapr_events.c > > >> @@ -212,6 +212,106 @@ struct hp_extended_log { > > >> struct rtas_event_log_v6_hp hp; > > >> } QEMU_PACKED; > > >> > > >> +struct rtas_event_log_v6_mc { > > >> +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ > > >> + struct rtas_event_log_v6_section_header hdr; > > >> + uint32_t fru_id; > > >> + uint32_t proc_id; > > >> + uint8_t error_type; > > >> +#define RTAS_LOG_V6_MC_TYPE_UE 0 > > >> +#define RTAS_LOG_V6_MC_TYPE_SLB 1 > > >> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 > > >> +#define RTAS_LOG_V6_MC_TYPE_TLB 4 > > >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 > > >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 > > >> + uint8_t sub_err_type; > > >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 > > >> +#define RTAS_LOG_V6_MC_UE_IFETCH 1 > > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 > > >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 > > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 > > >> +#define RTAS_LOG_V6_MC_SLB_PARITY 0 > > >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 > > >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 > > >> +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 > > >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 > > >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 > > >> +#define RTAS_LOG_V6_MC_TLB_PARITY 1 > > >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 > > >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 > > >> + uint8_t reserved_1[6]; > > >> + uint64_t effective_address; > > >> + uint64_t logical_address; > > >> +} QEMU_PACKED; > > >> + > > >> +struct mc_extended_log { > > >> + struct rtas_event_log_v6 v6hdr; > > >> + struct rtas_event_log_v6_mc mc; > > >> +} QEMU_PACKED; > > >> + > > >> +struct MC_ierror_table { > > >> + unsigned long srr1_mask; > > >> + unsigned long srr1_value; > > >> + bool nip_valid; /* nip is a valid indicator of faulting address */ > > >> + uint8_t error_type; > > >> + uint8_t error_subtype; > > >> + unsigned int initiator; > > >> + unsigned int severity; > > >> +}; > > >> + > > >> +static const struct MC_ierror_table mc_ierror_table[] = { > > >> +{ 0x00000000081c0000, 0x0000000000040000, true, > > >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000000081c0000, 0x0000000000080000, true, > > >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000000081c0000, 0x00000000000c0000, true, > > >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000000081c0000, 0x0000000000100000, true, > > >> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000000081c0000, 0x0000000000140000, true, > > >> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000000081c0000, 0x0000000000180000, true, > > >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0, 0, 0, 0, 0, 0 } }; > > >> + > > >> +struct MC_derror_table { > > >> + unsigned long dsisr_value; > > >> + bool dar_valid; /* dar is a valid indicator of faulting address */ > > >> + uint8_t error_type; > > >> + uint8_t error_subtype; > > >> + unsigned int initiator; > > >> + unsigned int severity; > > >> +}; > > >> + > > >> +static const struct MC_derror_table mc_derror_table[] = { > > >> +{ 0x00008000, false, > > >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00004000, true, > > >> + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000800, true, > > >> + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000400, true, > > >> + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000080, true, > > >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before > > >> PARITY */ > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0x00000100, true, > > >> + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, > > >> + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, > > >> +{ 0, false, 0, 0, 0, 0 } }; > > >> + > > >> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) > > >> + > > >> typedef enum EventClass { > > >> EVENT_CLASS_INTERNAL_ERRORS = 0, > > >> EVENT_CLASS_EPOW = 1, > > >> @@ -620,6 +720,138 @@ void > > >> spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, > > >> RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, > > >> &drc_id); > > >> } > > >> > > >> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, > > >> + struct mc_extended_log > > >> *ext_elog) > > >> +{ > > >> + int i; > > >> + CPUPPCState *env = &cpu->env; > > >> + uint32_t summary; > > >> + uint64_t dsisr = env->spr[SPR_DSISR]; > > >> + > > >> + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; > > >> + if (recovered) { > > >> + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; > > >> + } else { > > >> + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; > > >> + } > > >> + > > >> + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { > > >> + for (i = 0; mc_derror_table[i].dsisr_value; i++) { > > >> + if (!(dsisr & mc_derror_table[i].dsisr_value)) { > > >> + continue; > > >> + } > > >> + > > >> + ext_elog->mc.error_type = mc_derror_table[i].error_type; > > >> + ext_elog->mc.sub_err_type = > > >> mc_derror_table[i].error_subtype; > > >> + if (mc_derror_table[i].dar_valid) { > > >> + ext_elog->mc.effective_address = > > >> cpu_to_be64(env->spr[SPR_DAR]); > > >> + } > > >> + > > >> + summary |= mc_derror_table[i].initiator > > >> + | mc_derror_table[i].severity; > > >> + > > >> + return summary; > > >> + } > > >> + } else { > > >> + for (i = 0; mc_ierror_table[i].srr1_mask; i++) { > > >> + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != > > >> + mc_ierror_table[i].srr1_value) { > > >> + continue; > > >> + } > > >> + > > >> + ext_elog->mc.error_type = mc_ierror_table[i].error_type; > > >> + ext_elog->mc.sub_err_type = > > >> mc_ierror_table[i].error_subtype; > > >> + if (mc_ierror_table[i].nip_valid) { > > >> + ext_elog->mc.effective_address = cpu_to_be64(env->nip); > > >> + } > > >> + > > >> + summary |= mc_ierror_table[i].initiator > > >> + | mc_ierror_table[i].severity; > > >> + > > >> + return summary; > > >> + } > > >> + } > > >> + > > >> + summary |= RTAS_LOG_INITIATOR_CPU; > > >> + return summary; > > >> +} > > >> + > > >> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) > > >> +{ > > >> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > >> + CPUState *cs = CPU(cpu); > > >> + uint64_t rtas_addr; > > >> + CPUPPCState *env = &cpu->env; > > >> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); > > >> + target_ulong r3, msr = 0; > > >> + struct rtas_error_log log; > > >> + struct mc_extended_log *ext_elog; > > >> + uint32_t summary; > > >> + > > >> + /* > > >> + * Properly set bits in MSR before we invoke the handler. > > >> + * SRR0/1, DAR and DSISR are properly set by KVM > > >> + */ > > >> + if (!(*pcc->interrupts_big_endian)(cpu)) { > > >> + msr |= (1ULL << MSR_LE); > > >> + } > > >> + > > >> + if (env->msr & (1ULL << MSR_SF)) { > > >> + msr |= (1ULL << MSR_SF); > > >> + } > > >> + > > >> + msr |= (1ULL << MSR_ME); > > >> + > > >> + if (spapr->guest_machine_check_addr == -1) { > > >> + /* > > >> + * This implies that we have hit a machine check between system > > >> + * reset and "ibm,nmi-register". Fall back to the old machine > > >> + * check behavior in such cases. > > >> + */ > > >> + env->spr[SPR_SRR0] = env->nip; > > >> + env->spr[SPR_SRR1] = env->msr; > > >> + env->msr = msr; > > >> + env->nip = 0x200; > > >> + return; > > >> + } > > >> + > > >> + ext_elog = g_malloc0(sizeof(*ext_elog)); > > >> + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); > > >> + > > >> + log.summary = cpu_to_be32(summary); > > >> + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); > > >> + > > >> + /* r3 should be in BE always */ > > >> + r3 = cpu_to_be64(env->gpr[3]); > > >> + env->msr = msr; > > >> + > > >> + spapr_init_v6hdr(&ext_elog->v6hdr); > > >> + ext_elog->mc.hdr.section_id = > > >> cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); > > >> + ext_elog->mc.hdr.section_length = > > >> + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); > > >> + ext_elog->mc.hdr.section_version = 1; > > >> + > > >> + /* get rtas addr from fdt */ > > >> + rtas_addr = spapr_get_rtas_addr(); > > >> + if (!rtas_addr) { > > >> + /* Unable to fetch rtas_addr. Hence reset the guest */ > > >> + ppc_cpu_do_system_reset(cs); > > >> + } > > >> + > > >> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3, > > >> + sizeof(r3)); > > >> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > > >> sizeof(r3), > > >> + &log, sizeof(log)); > > >> + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > > >> sizeof(r3) + > > >> + sizeof(log), ext_elog, > > >> + sizeof(*ext_elog)); > > >> + > > >> + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; > > >> + env->nip = spapr->guest_machine_check_addr; > > >> + > > >> + g_free(ext_elog); > > >> +} > > >> + > > >> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > > >> { > > >> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > >> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool > > >> recovered) > > >> } > > >> } > > >> spapr->mc_status = cpu->vcpu_id; > > >> + > > >> + spapr_mce_dispatch_elog(cpu, recovered); > > >> + > > >> + return; > > > > > > Drop the last two lines. > > > > ok. > > > > > > > >> } > > >> > > >> static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, > > >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > > >> index fc3a776..c717ab2 100644 > > >> --- a/include/hw/ppc/spapr.h > > >> +++ b/include/hw/ppc/spapr.h > > >> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void > > >> *fdt, hwaddr addr); > > >> > > >> #define RTAS_ERROR_LOG_MAX 2048 > > >> > > >> +/* Offset from rtas-base where error log is placed */ > > >> +#define RTAS_ERROR_LOG_OFFSET 0x30 > > >> + > > >> #define RTAS_EVENT_SCAN_RATE 1 > > >> > > >> /* This helper should be used to encode interrupt specifiers when the > > >> related > > >> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState > > >> *spapr); > > >> void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, > > >> uint64_t pte0, uint64_t pte1); > > >> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered); > > >> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea); > > >> > > > > > > Looks like a leftover. > > > > ah.. yes. > > > > > > > >> /* DRC callbacks. */ > > >> void spapr_core_release(DeviceState *dev); > > >> > > > > > > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature