diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c734472721..36d8fbe872 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -24,6 +24,7 @@
#include <sys/ioctl.h>
#include "hw/core/hw-error.h"
+#include "hw/core/iommu.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_bridge.h"
@@ -2498,9 +2499,62 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev,
uint16_t pos)
return 0;
}
+/*
+ * Try to retrieve PASID capability information via IOMMUFD APIs and,
+ * if supported, synthesize a PASID PCIe extended capability for the
+ * VFIO device.
+ *
+ * Use user-specified PASID capability offset if provided, otherwise
+ * place it at the end of the PCIe extended configuration space.
+ */
+static bool vfio_pci_synthesize_pasid_cap(VFIOPCIDevice *vdev, Error **errp)
+{
+ HostIOMMUDevice *hiod = vdev->vbasedev.hiod;
+ HostIOMMUDeviceClass *hiodc;
+ PasidInfo pasid_info;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint16_t pasid_offset;
+
+ if (!hiod) {
+ return true;
+ }
+
+ hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ if (!hiodc || !hiodc->get_pasid_info ||
+ !hiodc->get_pasid_info(hiod, &pasid_info) ||
+ !(pci_device_get_viommu_flags(pdev) & VIOMMU_FLAG_PASID_SUPPORTED)) {
+ return true;
+ }
+
+ /* Use user-specified offset if set, otherwise place PASID at the end. */
+ if (vdev->vpasid_cap_offset) {
+ pasid_offset = vdev->vpasid_cap_offset;
+ } else {
+ pasid_offset = PCIE_CONFIG_SPACE_SIZE - PCI_EXT_CAP_PASID_SIZEOF;
+ }
+
+ if (!pcie_insert_capability(pdev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER,
+ pasid_offset, PCI_EXT_CAP_PASID_SIZEOF)) {
+ error_setg(errp, "vfio: Placing PASID capability at offset 0x%x
failed",
+ pasid_offset);
+ return false;
+ }
+ trace_vfio_pci_synthesize_pasid_cap(vdev->vbasedev.name, pasid_offset);
+
+ pcie_pasid_common_init(pdev, pasid_offset, pasid_info.max_pasid_log2,
+ pasid_info.exec_perm, pasid_info.priv_mod);
+
+ /* PASID capability is fully emulated by QEMU */
+ memset(vdev->emulated_config_bits + pdev->exp.pasid_cap, 0xff,
+ PCI_EXT_CAP_PASID_SIZEOF);
+ return true;
+}
+
static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = PCI_DEVICE(vdev);
+ bool pasid_cap_added = false;
+ Error *err = NULL;
uint32_t header;
uint16_t cap_id, next, size;
uint8_t cap_ver;
@@ -2578,12 +2632,24 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
}
break;
+ /*
+ * VFIO kernel does not expose the PASID CAP today. We may synthesize
+ * one later through IOMMUFD APIs. If VFIO ever starts exposing it,
+ * record its presence here so we do not create a duplicate CAP.
+ */
+ case PCI_EXT_CAP_ID_PASID:
+ pasid_cap_added = true;
+ /* fallthrough */
default:
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
}
}
+ if (!pasid_cap_added && !vfio_pci_synthesize_pasid_cap(vdev, &err)) {
+ error_report_err(err);
+ }
+
/* Cleanup chain head ID if necessary */
if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) {
pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
@@ -3756,6 +3822,8 @@ static const Property vfio_pci_properties[] = {
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
#endif
DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
+ DEFINE_PROP_UINT16("x-vpasid-cap-offset", VFIOPCIDevice,
+ vpasid_cap_offset, 0),
};
#ifdef CONFIG_IOMMUFD
@@ -3913,6 +3981,13 @@ static void vfio_pci_class_init(ObjectClass *klass,
const void *data)
"destination when doing live "
"migration of device state via "
"multifd channels");
+ object_class_property_set_description(klass, /* 11.0 */
+ "x-vpasid-cap-offset",
+ "PCIe extended configuration space offset
at which to place a "
+ "synthetic PASID extended capability when
PASID is enabled via "
+ "a vIOMMU. A value of 0 (default) places
the capability at the "
+ "end of the extended configuration space.
The offset must be "
+ "4-byte aligned and within the PCIe
extended configuration space");
}
static const TypeInfo vfio_pci_info = {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 0f78cf9cdb..d6495d7f29 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -187,6 +187,7 @@ struct VFIOPCIDevice {
bool defer_kvm_irq_routing;
bool clear_parent_atomics_on_exit;
bool skip_vsc_check;
+ uint16_t vpasid_cap_offset;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
VFIOPCICPR cpr;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 180e3d526b..b48c4abe7a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -40,6 +40,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result)
"%s hot reset: %
vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long
offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags:
0x%lx"
vfio_pci_populate_device_get_irq_info_failure(const char *errstr)
"VFIO_DEVICE_GET_IRQ_INFO failure: %s"
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
+vfio_pci_synthesize_pasid_cap(const char *name, uint16_t offset) "%s offset:
0x%x"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s
0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
diff --git a/include/hw/core/iommu.h b/include/hw/core/iommu.h
index d5401a397b..86af315c15 100644
--- a/include/hw/core/iommu.h
+++ b/include/hw/core/iommu.h
@@ -20,6 +20,7 @@
enum viommu_flags {
/* vIOMMU needs nesting parent HWPT to create nested HWPT */
VIOMMU_FLAG_WANT_NESTING_PARENT = BIT_ULL(0),
+ VIOMMU_FLAG_PASID_SUPPORTED = BIT_ULL(1),
};
/* Host IOMMU quirks. Extracted from host IOMMU capabilities */