On 2/9/26 14:40, Teddy Astie wrote:
> Hello,
>
> Some comments, mostly code style, nothing functionnal.
>
> Le 09/02/2026 à 12:36, Julian Vetter a écrit :
>> x2APIC guests with more than 128 vCPUs have APIC IDs above 255, but MSI
>> addresses and IO-APIC RTEs only provide an 8-bit destination field.
>> Without extended destination ID support, Linux limits the maximum usable
>> APIC ID to 255, refusing to bring up vCPUs beyond that limit. So,
>> advertise XEN_HVM_CPUID_EXT_DEST_ID in the HVM hypervisor CPUID leaf,
>> signalling that guests may use MSI address bits 11:5 and IO-APIC RTE
>> bits 55:49 as additional high destination ID bits. This expands the
>> destination ID from 8 to 15 bits.
>>
>> Signed-off-by: Julian Vetter <[email protected]>
>> ---
>>    xen/arch/x86/cpuid.c                   |  9 +++++++++
>>    xen/arch/x86/hvm/irq.c                 | 11 ++++++++++-
>>    xen/arch/x86/hvm/vioapic.c             |  2 +-
>>    xen/arch/x86/hvm/vmsi.c                |  4 ++--
>>    xen/arch/x86/include/asm/hvm/hvm.h     |  4 ++--
>>    xen/arch/x86/include/asm/hvm/vioapic.h | 13 +++++++++++++
>>    xen/arch/x86/include/asm/msi.h         |  3 +++
>>    7 files changed, 40 insertions(+), 6 deletions(-)
>>
>> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
>> index d85be20d86..fb17c71d74 100644
>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -148,6 +148,15 @@ static void cpuid_hypervisor_leaves(const struct vcpu 
>> *v, uint32_t leaf,
>>            res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
>>            res->c = d->domain_id;
>>
>> +        /*
>> +         * Advertise extended destination ID support. This allows guests to 
>> use
>> +         * bits 11:5 of the MSI address and bits 55:49 of the IO-APIC RTE 
>> for
>> +         * additional destination ID bits, expanding the addressable APIC ID
>> +         * range from 8 to 15 bits. This is required for x2APIC guests with
>> +         * APIC IDs > 255.
>> +         */
>> +        res->a |= XEN_HVM_CPUID_EXT_DEST_ID;
>> +
>>            /*
>>             * Per-vCPU event channel upcalls are implemented and work
>>             * correctly with PIRQs routed over event channels.
>> diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
>> index 5f64361113..2cc14d37d4 100644
>> --- a/xen/arch/x86/hvm/irq.c
>> +++ b/xen/arch/x86/hvm/irq.c
>> @@ -374,7 +374,16 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, 
>> u8 isa_irq)
>>    int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
>>    {
>>        uint32_t tmp = (uint32_t) addr;
>> -    uint8_t  dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
>> +    /*
>> +     * Standard MSI destination: address bits 19:12 (8 bits).
>> +     * Extended MSI destination: address bits 11:5 (7 more bits).
>> +     * When XEN_HVM_CPUID_EXT_DEST_ID is advertised, the guest may use
>> +     * bits 11:5 for high destination ID bits, expanding to 15 bits total.
>
> As we always advertise XEN_HVM_CPUID_EXT_DEST_ID, I would rather say
>
>   > As XEN_HVM_CPUID_EXT_DEST_ID is advertised, ...
>
>> +     * For legacy guests these bits are 0, so this is backwards-compatible.
>
> "Guests unaware of this feature set these bits to 0, ..."
>
>> +     */
>> +    uint32_t dest =
>> +        (((tmp & MSI_ADDR_EXT_DEST_ID_MASK) >> MSI_ADDR_EXT_DEST_ID_SHIFT) 
>> << 8) |
>> +        ((tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT);
>
> I wonder if we should introduce a macro like you did for IO-APIC
> (VIOAPIC_RTE_DEST).
>
>>        uint8_t  dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK);
>>        uint8_t  delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK)
>>            >> MSI_DATA_DELIVERY_MODE_SHIFT;
>> diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
>> index 7c725f9e47..263b1bd5cb 100644
>> --- a/xen/arch/x86/hvm/vioapic.c
>> +++ b/xen/arch/x86/hvm/vioapic.c
>> @@ -411,7 +411,7 @@ static void ioapic_inj_irq(
>>
>>    static void vioapic_deliver(struct hvm_vioapic *vioapic, unsigned int pin)
>>    {
>> -    uint16_t dest = vioapic->redirtbl[pin].fields.dest_id;
>> +    uint32_t dest = VIOAPIC_RTE_DEST(vioapic->redirtbl[pin].bits);
>
> I would rather introduce a new field in vioapic_redir_entry for the
> extended dest part; and compute dest from that and dest_id.

Here I have a question. This struct is a public ABI struct.
vioapic_redir_entry is defined in
xen/include/public/arch-x86/hvm/save.h. It's part of XENs VM
save/restore operation. It is used by libxc and toolstacks to migrate
VMs between hosts. Changing the struct might be undesirable? Yes, it
would make the code cleaner. having a bit entry for the extended dest
bits. What's the general opinion on this? With the VIOAPIC_RTE_DEST
macro I avoided touching this struct...

>
>>        uint8_t dest_mode = vioapic->redirtbl[pin].fields.dest_mode;
>>        uint8_t delivery_mode = vioapic->redirtbl[pin].fields.delivery_mode;
>>        uint8_t vector = vioapic->redirtbl[pin].fields.vector;
>> diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
>> index 27b1f089e2..dca191b4f1 100644
>> --- a/xen/arch/x86/hvm/vmsi.c
>> +++ b/xen/arch/x86/hvm/vmsi.c
>> @@ -66,7 +66,7 @@ static void vmsi_inj_irq(
>>
>>    int vmsi_deliver(
>>        struct domain *d, int vector,
>> -    uint8_t dest, uint8_t dest_mode,
>> +    uint32_t dest, uint8_t dest_mode,
>>        uint8_t delivery_mode, uint8_t trig_mode)
>>    {
>>        struct vlapic *target;
>> @@ -125,7 +125,7 @@ void vmsi_deliver_pirq(struct domain *d, const struct 
>> hvm_pirq_dpci *pirq_dpci)
>>    }
>>
>>    /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
>> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t 
>> dest_mode)
>> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t 
>> dest_mode)
>>    {
>>        int dest_vcpu_id = -1, w = 0;
>>        struct vcpu *v;
>> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h 
>> b/xen/arch/x86/include/asm/hvm/hvm.h
>> index 7d9774df59..11256d5e67 100644
>> --- a/xen/arch/x86/include/asm/hvm/hvm.h
>> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
>> @@ -295,11 +295,11 @@ uint64_t hvm_get_guest_time_fixed(const struct vcpu 
>> *v, uint64_t at_tsc);
>>
>>    int vmsi_deliver(
>>        struct domain *d, int vector,
>> -    uint8_t dest, uint8_t dest_mode,
>> +    uint32_t dest, uint8_t dest_mode,
>>        uint8_t delivery_mode, uint8_t trig_mode);
>>    struct hvm_pirq_dpci;
>>    void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci 
>> *pirq_dpci);
>> -int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t 
>> dest_mode);
>> +int hvm_girq_dest_2_vcpu_id(struct domain *d, uint32_t dest, uint8_t 
>> dest_mode);
>>
>>    enum hvm_intblk
>>    hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
>> diff --git a/xen/arch/x86/include/asm/hvm/vioapic.h 
>> b/xen/arch/x86/include/asm/hvm/vioapic.h
>> index 68af6dce79..b49eb348d5 100644
>> --- a/xen/arch/x86/include/asm/hvm/vioapic.h
>> +++ b/xen/arch/x86/include/asm/hvm/vioapic.h
>> @@ -32,6 +32,19 @@
>>    #define VIOAPIC_EDGE_TRIG  0
>>    #define VIOAPIC_LEVEL_TRIG 1
>>
>> +/*
>> + * Extract the destination ID from a 64-bit IO-APIC RTE, including the
>> + * extended bits (55:49) used when XEN_HVM_CPUID_EXT_DEST_ID is advertised.
>> + */
>> +#define IO_APIC_REDIR_DEST_SHIFT        56
>> +#define IO_APIC_REDIR_DEST_MASK         0xffULL
>> +#define IO_APIC_REDIR_EXT_DEST_SHIFT    49
>> +#define IO_APIC_REDIR_EXT_DEST_MASK     0x7fULL
>> +
>> +#define VIOAPIC_RTE_DEST(rte) \
>> +    ((((rte) >> IO_APIC_REDIR_DEST_SHIFT) & IO_APIC_REDIR_DEST_MASK) | \
>> +     (((rte) >> IO_APIC_REDIR_EXT_DEST_SHIFT) & 
>> IO_APIC_REDIR_EXT_DEST_MASK) << 8)
>> +
>>    #define VIOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000U
>>    #define VIOAPIC_MEM_LENGTH            0x100
>>
>> diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
>> index 00059d4a3a..b7a132e5b5 100644
>> --- a/xen/arch/x86/include/asm/msi.h
>> +++ b/xen/arch/x86/include/asm/msi.h
>> @@ -54,6 +54,9 @@
>>    #define    MSI_ADDR_DEST_ID_MASK          0x00ff000
>>    #define  MSI_ADDR_DEST_ID(dest)           (((dest) << 
>> MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
>>
>> +#define MSI_ADDR_EXT_DEST_ID_SHIFT  5
>> +#define MSI_ADDR_EXT_DEST_ID_MASK   0x0000fe0
>> +
>>    /* MAX fixed pages reserved for mapping MSIX tables. */
>>    #define FIX_MSIX_MAX_PAGES              512
>>
>
>
>
> --
> Teddy Astie | Vates XCP-ng Developer
>
> XCP-ng & Xen Orchestra - Vates solutions
>
> web: https://vates.tech



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech



Reply via email to