@Marks,

This is the testing kernel with the patch below applied:
https://people.canonical.com/~hwang4/lp2111521/test12/

Please add "dyndbg="file drivers/pci/* +p" and collect a dmesg.txt,
thanks.

BTW, I was OOO the past week, so it is a bit late to respond.

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e9448d55113b..c9abb18988b0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1268,6 +1268,8 @@ static int pci_dev_wait(struct pci_dev *dev, char 
*reset_type, int timeout)
        bool retrain = false;
        struct pci_dev *root, *bridge;
 
+       pci_dbg(dev, "%s: %s timeout %d\n", __func__, reset_type, timeout);
+
        root = pcie_find_root_port(dev);
 
        if (pci_is_pcie(dev)) {
@@ -1305,14 +1307,32 @@ static int pci_dev_wait(struct pci_dev *dev, char 
*reset_type, int timeout)
 
                if (root && root->config_rrs_sv) {
                        pci_read_config_dword(dev, PCI_VENDOR_ID, &id);
-                       if (!pci_bus_rrs_vendor_id(id))
-                               break;
+                       pci_dbg(dev, "%s: vf %d read %#06x\n", __func__,
+                               dev->is_virtfn, id);
+                       if (pci_bus_rrs_vendor_id(id))
+                               goto retry;
+
+                       /*
+                        * We might read 0xffff if the device is a VF and
+                        * the read was successful (the VF Vendor ID is
+                        * 0xffff per spec).
+                        *
+                        * If the device is not a VF, 0xffff likely means
+                        * there was an error on PCIe.  E.g., maybe the
+                        * device couldn't even respond with RRS status,
+                        * and the RC timed out and synthesized ~0 data.
+                        */
+                       if (PCI_POSSIBLE_ERROR(id) && !dev->is_virtfn)
+                                   goto retry;
+
+                       break;
                } else {
                        pci_read_config_dword(dev, PCI_COMMAND, &id);
                        if (!PCI_POSSIBLE_ERROR(id))
                                break;
                }
 
+retry:
                if (delay > timeout) {
                        pci_warn(dev, "not ready %dms after %s; giving up\n",
                                 delay - 1, reset_type);
@@ -4760,6 +4780,8 @@ static bool pcie_wait_for_link_delay(struct pci_dev 
*pdev, bool active,
         * Some controllers might not implement link active reporting. In this
         * case, we wait for 1000 ms + any delay requested by the caller.
         */
+       pci_dbg(pdev, "%s: active %d delay %d link_active_reporting %d\n",
+               __func__, active, delay, pdev->link_active_reporting);
        if (!pdev->link_active_reporting) {
                msleep(PCIE_LINK_RETRAIN_TIMEOUT_MS + delay);
                return true;
(END)

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/2111521

Title:
  nvme no longer detected on boot after upgrade to 6.8.0-60

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2111521/+subscriptions


-- 
ubuntu-bugs mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

Reply via email to