On 11/30/20 5:52 PM, Greg Kurz wrote: > The sPAPR XIVE device has an internal ENDT table the size of > which is configurable by the machine. This table is supposed > to contain END structures for all possible vCPUs that may > enter the guest. The machine must also claim IPIs for all > possible vCPUs since this is expected by the guest. > > spapr_irq_init() takes care of that under the assumption that > spapr_max_vcpu_ids() returns the number of possible vCPUs. > This happens to be the case when the VSMT mode is set to match > the number of threads per core in the guest (default behavior). > With non-default VSMT settings, this limit is > to the number > of vCPUs. In the worst case, we can end up allocating an 8 times > bigger ENDT and claiming 8 times more IPIs than needed. But more > importantly, this creates a confusion between number of vCPUs and > vCPU ids, which can lead to subtle bugs like [1]. > > Use smp.max_cpus instead of spapr_max_vcpu_ids() in > spapr_irq_init() for the latest machine type. Older machine > types continue to use spapr_max_vcpu_ids() since the size of > the ENDT is migration visible. > > [1] https://bugs.launchpad.net/qemu/+bug/1900241 > > Signed-off-by: Greg Kurz <gr...@kaod.org>
Reviewed-by: Cédric Le Goater <c...@kaod.org> Thanks, C. > --- > include/hw/ppc/spapr.h | 1 + > hw/ppc/spapr.c | 3 +++ > hw/ppc/spapr_irq.c | 16 +++++++++++++--- > 3 files changed, 17 insertions(+), 3 deletions(-) > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index dc99d45e2852..95bf210d0bf6 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -139,6 +139,7 @@ struct SpaprMachineClass { > hwaddr rma_limit; /* clamp the RMA to this size */ > bool pre_5_1_assoc_refpoints; > bool pre_5_2_numa_associativity; > + bool pre_6_0_xive_max_cpus; > > bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, > uint64_t *buid, hwaddr *pio, > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index ab59bfe941d0..227a926dfd48 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -4530,8 +4530,11 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true); > */ > static void spapr_machine_5_2_class_options(MachineClass *mc) > { > + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); > + > spapr_machine_6_0_class_options(mc); > compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len); > + smc->pre_6_0_xive_max_cpus = true; > } > > DEFINE_SPAPR_MACHINE(5_2, "5.2", false); > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c > index 552e30e93036..4d9ecd5d0af8 100644 > --- a/hw/ppc/spapr_irq.c > +++ b/hw/ppc/spapr_irq.c > @@ -324,17 +324,27 @@ void spapr_irq_init(SpaprMachineState *spapr, Error > **errp) > } > > if (spapr->irq->xive) { > - uint32_t max_vcpu_ids = spapr_max_vcpu_ids(spapr); > + uint32_t max_cpus = MACHINE(spapr)->smp.max_cpus; > DeviceState *dev; > int i; > > + /* > + * Older machine types used to size the ENDT and IPI range > + * according to the upper limit of vCPU ids, which can be > + * higher than smp.max_cpus with custom VSMT settings. Keep > + * the previous behavior for compatibility with such setups. > + */ > + if (smc->pre_6_0_xive_max_cpus) { > + max_cpus = spapr_max_vcpu_ids(spapr); > + } > + > dev = qdev_new(TYPE_SPAPR_XIVE); > qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + > SPAPR_XIRQ_BASE); > /* > * 8 XIVE END structures per CPU. One for each available > * priority > */ > - qdev_prop_set_uint32(dev, "nr-ends", max_vcpu_ids << 3); > + qdev_prop_set_uint32(dev, "nr-ends", max_cpus << 3); > object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr), > &error_abort); > sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); > @@ -342,7 +352,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error > **errp) > spapr->xive = SPAPR_XIVE(dev); > > /* Enable the CPU IPIs */ > - for (i = 0; i < max_vcpu_ids; ++i) { > + for (i = 0; i < max_cpus; ++i) { > SpaprInterruptControllerClass *sicc > = SPAPR_INTC_GET_CLASS(spapr->xive); > >