>-----Original Message-----
>From: Eric Auger <[email protected]>
>Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>callback
>
>Hi Zhenzhong,
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu <[email protected]>
>>
>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>> PCI BDF.
>>
>> Signed-off-by: Yi Liu <[email protected]>
>> Signed-off-by: Yi Sun <[email protected]>
>> Signed-off-by: Zhenzhong Duan <[email protected]>
>> ---
>> include/hw/i386/intel_iommu.h | 10 +++++
>> hw/i386/intel_iommu.c | 79
>+++++++++++++++++++++++++++++++++++
>> 2 files changed, 89 insertions(+)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7fa0a695c8..c65fdde56f 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>> typedef struct VTDPASIDEntry VTDPASIDEntry;
>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>
>> /* Context-Entry */
>> struct VTDContextEntry {
>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>> IOVATree *iova_tree;
>> };
>>
>> +struct VTDIOMMUFDDevice {
>> + PCIBus *bus;
>> + uint8_t devfn;
>> + IOMMUFDDevice *idev;
>> + IntelIOMMUState *iommu_state;
>> +};
>> +
>Just wondering whether we shouldn't reuse the VTDAddressSpace to store
>the idev, if any. How have you made your choice. What will it become
>when PASID gets added?
VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed
by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in
VTDAddressSpace, may need a list in case more than one device in same address
space. Then a global VTDIOMMUFDDevice list is better for lookup.
For PASID in modern mode which support stage-1 page table, we have
VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use
VTDAddressSpace which is for stage-2 page table.
Thanks
Zhenzhong
>> struct VTDIOTLBEntry {
>> uint64_t gfn;
>> uint16_t domain_id;
>> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>> /* list of registered notifiers */
>> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */
>> +
>> /* interrupt remapping */
>> bool intr_enabled; /* Whether guest enabled IR */
>> dma_addr_t intr_root; /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index ed5677c0ae..95faf697eb 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>> (key1->pasid == key2->pasid);
>> }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> + const struct vtd_as_key *key1 = v1;
>> + const struct vtd_as_key *key2 = v2;
>> +
>> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>> /*
>> * Note that we use pointer to PCIBus as the key, so hashing/shifting
>> * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,74 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>> return vtd_dev_as;
>> }
>>
>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>int32_t devfn,
>> + IOMMUFDDevice *idev, Error **errp)
>> +{
>> + IntelIOMMUState *s = opaque;
>> + VTDIOMMUFDDevice *vtd_idev;
>> + struct vtd_as_key key = {
>> + .bus = bus,
>> + .devfn = devfn,
>> + };
>> + struct vtd_as_key *new_key;
>> +
>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>> +
>> + /* None IOMMUFD case */
>> + if (!idev) {
>> + return 0;
>> + }
>> +
>> + vtd_iommu_lock(s);
>> +
>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>> +
>> + if (vtd_idev) {
>> + error_setg(errp, "IOMMUFD device already exist");
>> + return -1;
>> + }
>> +
>> + new_key = g_malloc(sizeof(*new_key));
>> + new_key->bus = bus;
>> + new_key->devfn = devfn;
>> +
>> + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
>> + vtd_idev->bus = bus;
>> + vtd_idev->devfn = (uint8_t)devfn;
>> + vtd_idev->iommu_state = s;
>> + vtd_idev->idev = idev;
>> +
>> + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
>> +
>> + vtd_iommu_unlock(s);
>> +
>> + return 0;
>> +}
>> +
>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque,
>int32_t devfn)
>> +{
>> + IntelIOMMUState *s = opaque;
>> + VTDIOMMUFDDevice *vtd_idev;
>> + struct vtd_as_key key = {
>> + .bus = bus,
>> + .devfn = devfn,
>> + };
>> +
>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>> +
>> + vtd_iommu_lock(s);
>> +
>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>> + if (!vtd_idev) {
>> + vtd_iommu_unlock(s);
>> + return;
>> + }
>> +
>> + g_hash_table_remove(s->vtd_iommufd_dev, &key);
>> +
>> + vtd_iommu_unlock(s);
>> +}
>> +
>> /* Unmap the whole range in the notifier's scope. */
>> static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n)
>> {
>> @@ -4107,6 +4182,8 @@ static AddressSpace
>*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>
>> static PCIIOMMUOps vtd_iommu_ops = {
>> .get_address_space = vtd_host_dma_iommu,
>> + .set_iommu_device = vtd_dev_set_iommu_device,
>> + .unset_iommu_device = vtd_dev_unset_iommu_device,
>> };
>>
>> static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>> g_free, g_free);
>> s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>vtd_as_equal,
>> g_free, g_free);
>> + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash,
>vtd_as_idev_equal,
>> + g_free, g_free);
>> vtd_init(s);
>> pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>> /* Pseudo address space under root PCI bus. */
>Thanks
>
>Eric