Add vfio_ensure_d0_state() to safely transition PCI devices from D3hot/D3cold
to D0 before QEMU guest access, preventing config space inaccessibility and
tg3 IRQ crashes during VFIO realize.

Key changes:
- D3hot: Direct PMCSR write (offset 0x44) to force PowerState=00 (D0)
- D3cold: pm_runtime_resume() + pm_runtime_get_sync() for full power restore
- Polling loop verifies D0 transition completion
- No-op for already D0 devices

Fixes PowerPC EEH races where devices enter low-power states during VFIO
handover, causing config space access failures.

Signed-off-by: Narayana Murty N <[email protected]>
---
 hw/vfio/pci.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c734472721..851cd789aa 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3392,6 +3392,51 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error 
**errp)
     return true;
 }
 
+static int write_sysfs(const char *path, const char *value)
+{
+    FILE *f = fopen(path, "w");
+    if (!f) {
+        return -1;
+    }
+    int ret = fprintf(f, "%s", value);
+    fclose(f);
+    return (ret > 0) ? 0 : -1;
+}
+
+static void vfio_ensure_d0_state(VFIOPCIDevice *vdev)
+{
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    char sysfs_power_path[PATH_MAX];
+
+    /*
+     * Test config region accessibility (D3cold-safe, no PCI config
+     * reads!)
+     */
+    struct vfio_region_info reg_info = {
+        .argsz = sizeof(reg_info),
+        .index = VFIO_PCI_CONFIG_REGION_INDEX,
+        .offset = 0,
+        .size = 0
+    };
+
+    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info) < 0) {
+        warn_report("vfio: %s config region probe failed (D3cold): %s",
+                    vbasedev->name, strerror(errno));
+
+        /* D3cold confirmed → sysfs power control (EEH-safe) */
+        snprintf(sysfs_power_path, sizeof(sysfs_power_path),
+                 "/sys/bus/pci/devices/%s/power/control", vbasedev->name;
+
+        /* Force runtime resume */
+        if (write_sysfs(sysfs_power_path, "on") == 0) {
+            g_usleep(10000);  /* 10ms settle */
+            write_sysfs(sysfs_power_path, "auto");
+            info_report("vfio: %s D3cold → D0 via sysfs", vbasedev->name);
+        }
+    }
+    return;
+}
+
 static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
 {
     ERRP_GUARD();
@@ -3401,6 +3446,13 @@ static void vfio_pci_realize(PCIDevice *pdev, Error 
**errp)
     char uuid[UUID_STR_LEN];
     g_autofree char *name = NULL;
 
+    /*
+     * ensure the power state of the pci device to D0,
+     * otherwise it will set to D0, before accessing the
+     * config space.
+     */
+    vfio_ensure_d0_state(vdev);
+
     if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
         if (!(~vdev->host.domain || ~vdev->host.bus ||
               ~vdev->host.slot || ~vdev->host.function)) {
-- 
2.51.1

Reply via email to